[RFC,2/5] perf: Add fork to the sideband ioctl

Message ID 20230414082300.34798-3-adrian.hunter@intel.com
State New
Headers
Series perf: Add ioctl to emit sideband events |

Commit Message

Adrian Hunter April 14, 2023, 8:22 a.m. UTC
  Support the case of output to an active event, and return an error if
output is not possible in that case. Set PERF_RECORD_MISC_STATUS_ONLY to
differentiate the ioctl status-only sideband event from a "real" sideband
event.

Set the fork parent pid/tid to the real parent for a thread group leader,
or to the thread group leader otherwise.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
---
 kernel/events/core.c | 88 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 73 insertions(+), 15 deletions(-)
  

Patch

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5cbcc6851587..4e76596d3bfb 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7948,6 +7948,54 @@  perf_iterate_sb(perf_iterate_f output, void *data,
 	rcu_read_unlock();
 }
 
+typedef int (perf_output_f)(struct perf_event *event, void *data);
+
+static int perf_event_output_sb(struct perf_event *event, perf_output_f output, void *data)
+{
+	int err = -ENOENT;
+
+	preempt_disable();
+
+	if (event->state != PERF_EVENT_STATE_ACTIVE ||
+	    !event_filter_match(event) ||
+	    READ_ONCE(event->oncpu) != smp_processor_id())
+		goto out;
+
+	err = output(event, data);
+out:
+	preempt_enable();
+	return err;
+}
+
+struct perf_output_f_data {
+	perf_output_f *func;
+	void *data;
+};
+
+void perf_output_f_wrapper(struct perf_event *event, void *data)
+{
+	struct perf_output_f_data *f_data = data;
+
+	f_data->func(event, f_data->data);
+}
+
+static int perf_output_sb(perf_output_f output, void *data,
+			  struct perf_event_context *task_ctx,
+			  struct perf_event *event)
+{
+	struct perf_output_f_data f_data = {
+		.func = output,
+		.data = data,
+	};
+
+	if (event)
+		return perf_event_output_sb(event, output, data);
+
+	perf_iterate_sb(perf_output_f_wrapper, &f_data, task_ctx);
+
+	return 0;
+}
+
 /*
  * Clear all file-based filters at exec, they'll have to be
  * re-instated when/if these objects are mmapped again.
@@ -8107,8 +8155,7 @@  static int perf_event_task_match(struct perf_event *event)
 	       event->attr.task;
 }
 
-static void perf_event_task_output(struct perf_event *event,
-				   void *data)
+static int perf_event_task_output(struct perf_event *event, void *data)
 {
 	struct perf_task_event *task_event = data;
 	struct perf_output_handle handle;
@@ -8117,7 +8164,7 @@  static void perf_event_task_output(struct perf_event *event,
 	int ret, size = task_event->event_id.header.size;
 
 	if (!perf_event_task_match(event))
-		return;
+		return -ENOENT;
 
 	perf_event_header__init_id(&task_event->event_id.header, &sample, event);
 
@@ -8134,6 +8181,14 @@  static void perf_event_task_output(struct perf_event *event,
 							task->real_parent);
 		task_event->event_id.ptid = perf_event_pid(event,
 							task->real_parent);
+	} else if (task_event->event_id.header.misc & PERF_RECORD_MISC_STATUS_ONLY) {
+		if (thread_group_leader(task)) {
+			task_event->event_id.ppid = perf_event_pid(event, task->real_parent);
+			task_event->event_id.ptid = perf_event_tid(event, task->real_parent);
+		} else {
+			task_event->event_id.ppid = perf_event_pid(event, task);
+			task_event->event_id.ptid = perf_event_pid(event, task);
+		}
 	} else {  /* PERF_RECORD_FORK */
 		task_event->event_id.ppid = perf_event_pid(event, current);
 		task_event->event_id.ptid = perf_event_tid(event, current);
@@ -8148,18 +8203,19 @@  static void perf_event_task_output(struct perf_event *event,
 	perf_output_end(&handle);
 out:
 	task_event->event_id.header.size = size;
+	return ret;
 }
 
-static void perf_event_task(struct task_struct *task,
-			      struct perf_event_context *task_ctx,
-			      int new)
+static int perf_event_task(struct task_struct *task,
+			   struct perf_event_context *task_ctx,
+			   int new, struct perf_event *event)
 {
 	struct perf_task_event task_event;
 
 	if (!atomic_read(&nr_comm_events) &&
 	    !atomic_read(&nr_mmap_events) &&
 	    !atomic_read(&nr_task_events))
-		return;
+		return -ENOENT;
 
 	task_event = (struct perf_task_event){
 		.task	  = task,
@@ -8167,7 +8223,7 @@  static void perf_event_task(struct task_struct *task,
 		.event_id    = {
 			.header = {
 				.type = new ? PERF_RECORD_FORK : PERF_RECORD_EXIT,
-				.misc = 0,
+				.misc = event ? PERF_RECORD_MISC_STATUS_ONLY : 0,
 				.size = sizeof(task_event.event_id),
 			},
 			/* .pid  */
@@ -8178,14 +8234,12 @@  static void perf_event_task(struct task_struct *task,
 		},
 	};
 
-	perf_iterate_sb(perf_event_task_output,
-		       &task_event,
-		       task_ctx);
+	return perf_output_sb(perf_event_task_output, &task_event, task_ctx, event);
 }
 
 void perf_event_fork(struct task_struct *task)
 {
-	perf_event_task(task, NULL, 1);
+	perf_event_task(task, NULL, 1, NULL);
 	perf_event_namespaces(task);
 }
 
@@ -12817,7 +12871,11 @@  EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
 
 static int perf_event_emit_fork(struct perf_event *event, struct task_struct *task)
 {
-	return -EINVAL;
+	if (!event->attr.comm && !event->attr.mmap && !event->attr.mmap2 &&
+	    !event->attr.mmap_data && !event->attr.task)
+		return -EINVAL;
+
+	return perf_event_task(task, NULL, 1, event);
 }
 
 static int perf_event_emit_namespaces(struct perf_event *event, struct task_struct *task)
@@ -13115,7 +13173,7 @@  static void perf_event_exit_task_context(struct task_struct *child)
 	 * won't get any samples after PERF_RECORD_EXIT. We can however still
 	 * get a few PERF_RECORD_READ events.
 	 */
-	perf_event_task(child, child_ctx, 0);
+	perf_event_task(child, child_ctx, 0, NULL);
 
 	list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
 		perf_event_exit_event(child_event, child_ctx);
@@ -13157,7 +13215,7 @@  void perf_event_exit_task(struct task_struct *child)
 	 * child contexts and sets child->perf_event_ctxp[] to NULL.
 	 * At this point we need to send EXIT events to cpu contexts.
 	 */
-	perf_event_task(child, NULL, 0);
+	perf_event_task(child, NULL, 0, NULL);
 }
 
 static void perf_free_event(struct perf_event *event,