[v3,14/17] tracing: Add kho serialization of trace events

Message ID 20240117144704.602-15-graf@amazon.com
State New
Headers
Series kexec: Allow preservation of ftrace buffers |

Commit Message

Alexander Graf Jan. 17, 2024, 2:47 p.m. UTC
  Events and thus their parsing handle in ftrace have dynamic IDs that get
assigned whenever the event is added to the system. If we want to parse
trace events after kexec, we need to link event IDs back to the original
trace event that existed before we kexec'ed.

There are broadly 2 paths we could take for that:

  1) Save full event description across KHO, restore after kexec,
     merge identical trace events into a single identifier.
  2) Recover the ID of post-kexec added events so they get the same
     ID after kexec that they had before kexec

This patch implements the second option. It's simpler and thus less
intrusive. However, it means we can not fully parse affected events
when the kernel removes or modifies trace events across a kho kexec.

Signed-off-by: Alexander Graf <graf@amazon.com>

---

v1 -> v2:

  - Leave anything that requires a name in trace.c to keep buffers
    unnamed entities
  - Put events as array into a property, use fingerprint instead of
    names to identify them
  - Reduce footprint without CONFIG_FTRACE_KHO

v2 -> v3:

  - s/"global_trace"/"global-trace"/
---
 kernel/trace/trace.c        |  3 +-
 kernel/trace/trace_output.c | 89 +++++++++++++++++++++++++++++++++++++
 kernel/trace/trace_output.h |  5 +++
 3 files changed, 96 insertions(+), 1 deletion(-)
  

Comments

kernel test robot Jan. 18, 2024, 5:23 a.m. UTC | #1
Hi Alexander,

kernel test robot noticed the following build warnings:

[auto build test WARNING on linus/master]
[cannot apply to tip/x86/core arm64/for-next/core akpm-mm/mm-everything v6.7 next-20240117]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Alexander-Graf/mm-memblock-Add-support-for-scratch-memory/20240117-225136
base:   linus/master
patch link:    https://lore.kernel.org/r/20240117144704.602-15-graf%40amazon.com
patch subject: [PATCH v3 14/17] tracing: Add kho serialization of trace events
config: i386-randconfig-141-20240118 (https://download.01.org/0day-ci/archive/20240118/202401181352.qC85XHgx-lkp@intel.com/config)
compiler: ClangBuiltLinux clang version 17.0.6 (https://github.com/llvm/llvm-project 6009708b4367171ccdbf4b5905cb6a803753fe18)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240118/202401181352.qC85XHgx-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202401181352.qC85XHgx-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> kernel/trace/trace_output.c:731:12: warning: unsequenced modification and access to 'count' [-Wunsequenced]
     731 |                 map[count++] = (struct trace_event_map) {
         |                          ^
     732 |                         .crc32 = count,
         |                                  ~~~~~
   1 warning generated.


vim +/count +731 kernel/trace/trace_output.c

   710	
   711	static int __maybe_unused _trace_kho_write_events(void *fdt)
   712	{
   713		struct trace_event_call *call;
   714		int count = __TRACE_LAST_TYPE - 1;
   715		struct trace_event_map *map;
   716		int err = 0;
   717		int i;
   718	
   719		down_read(&trace_event_sem);
   720		/* Allocate an array that we can place all maps into */
   721		list_for_each_entry(call, &ftrace_events, list)
   722			count++;
   723	
   724		map = vmalloc(count * sizeof(*map));
   725		if (!map)
   726			return -ENOMEM;
   727	
   728		/* Then fill the array with all crc32 values */
   729		count = 0;
   730		for (i = 1; i < __TRACE_LAST_TYPE; i++)
 > 731			map[count++] = (struct trace_event_map) {
   732				.crc32 = count,
   733				.type = count,
   734			};
   735	
   736		list_for_each_entry(call, &ftrace_events, list) {
   737			struct trace_event *event = &call->event;
   738	
   739			map[count++] = (struct trace_event_map) {
   740				.crc32 = event2fp(event),
   741				.type = event->type,
   742			};
   743		}
   744		up_read(&trace_event_sem);
   745	
   746		/* And finally write it into a DT variable */
   747		err |= fdt_property(fdt, "events", map, count * sizeof(*map));
   748	
   749		vfree(map);
   750		return err;
   751	}
   752
  

Patch

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a5d7f5b4c19f..b5a6a2115b75 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -9364,7 +9364,7 @@  init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
 
 static int trace_kho_off_tr(struct trace_array *tr)
 {
-	const char *name = tr->name ? tr->name : "global_trace";
+	const char *name = tr->name ? tr->name : "global-trace";
 	const void *fdt = kho_get_fdt();
 	char *path;
 	int off;
@@ -10648,6 +10648,7 @@  static int trace_kho_notifier(struct notifier_block *self,
 
 	err |= fdt_begin_node(fdt, "ftrace");
 	err |= fdt_property(fdt, "compatible", compatible, sizeof(compatible));
+	err |= trace_kho_write_events(fdt);
 	err |= trace_kho_write_trace_array(fdt, &global_trace);
 	err |= fdt_end_node(fdt);
 
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 3e7fa44dc2b2..7d8815352e20 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -12,6 +12,8 @@ 
 #include <linux/sched/clock.h>
 #include <linux/sched/mm.h>
 #include <linux/idr.h>
+#include <linux/kexec.h>
+#include <linux/crc32.h>
 
 #include "trace_output.h"
 
@@ -669,6 +671,93 @@  int trace_print_lat_context(struct trace_iterator *iter)
 	return !trace_seq_has_overflowed(s);
 }
 
+/**
+ * event2fp - Return fingerprint of an event
+ * @event: The event to fingerprint
+ *
+ * For KHO, we need to match events before and after kexec to recover its type
+ * id. This function returns a hash that combines an event's name, and all of
+ * its fields' lengths.
+ */
+static u32 event2fp(struct trace_event *event)
+{
+	struct ftrace_event_field *field;
+	struct trace_event_call *call;
+	struct list_head *head;
+	const char *name;
+	u32 crc32 = ~0;
+
+	/* Low type numbers are static, nothing to checksum */
+	if (event->type && event->type < __TRACE_LAST_TYPE)
+		return event->type;
+
+	call = container_of(event, struct trace_event_call, event);
+	name = trace_event_name(call);
+	if (name)
+		crc32 = crc32_le(crc32, name, strlen(name));
+
+	head = trace_get_fields(call);
+	list_for_each_entry(field, head, link)
+		crc32 = crc32_le(crc32, (char *)&field->size, sizeof(field->size));
+
+	return crc32;
+}
+
+struct trace_event_map {
+	u32 crc32;
+	u32 type;
+};
+
+static int __maybe_unused _trace_kho_write_events(void *fdt)
+{
+	struct trace_event_call *call;
+	int count = __TRACE_LAST_TYPE - 1;
+	struct trace_event_map *map;
+	int err = 0;
+	int i;
+
+	down_read(&trace_event_sem);
+	/* Allocate an array that we can place all maps into */
+	list_for_each_entry(call, &ftrace_events, list)
+		count++;
+
+	map = vmalloc(count * sizeof(*map));
+	if (!map)
+		return -ENOMEM;
+
+	/* Then fill the array with all crc32 values */
+	count = 0;
+	for (i = 1; i < __TRACE_LAST_TYPE; i++)
+		map[count++] = (struct trace_event_map) {
+			.crc32 = count,
+			.type = count,
+		};
+
+	list_for_each_entry(call, &ftrace_events, list) {
+		struct trace_event *event = &call->event;
+
+		map[count++] = (struct trace_event_map) {
+			.crc32 = event2fp(event),
+			.type = event->type,
+		};
+	}
+	up_read(&trace_event_sem);
+
+	/* And finally write it into a DT variable */
+	err |= fdt_property(fdt, "events", map, count * sizeof(*map));
+
+	vfree(map);
+	return err;
+}
+
+#ifdef CONFIG_FTRACE_KHO
+int trace_kho_write_events(void *fdt)
+{
+	return _trace_kho_write_events(fdt);
+}
+#endif
+
+
 /**
  * ftrace_find_event - find a registered event
  * @type: the type of event to look for
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index dca40f1f1da4..07481f295436 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -25,6 +25,11 @@  extern enum print_line_t print_event_fields(struct trace_iterator *iter,
 extern void trace_event_read_lock(void);
 extern void trace_event_read_unlock(void);
 extern struct trace_event *ftrace_find_event(int type);
+#ifdef CONFIG_FTRACE_KHO
+extern int trace_kho_write_events(void *fdt);
+#else
+static inline int trace_kho_write_events(void *fdt) { return -EINVAL; }
+#endif
 
 extern enum print_line_t trace_nop_print(struct trace_iterator *iter,
 					 int flags, struct trace_event *event);