[v3,11/17] tracing: Introduce kho serialization

Message ID 20240117144704.602-12-graf@amazon.com
State New
Headers
Series kexec: Allow preservation of ftrace buffers |

Commit Message

Alexander Graf Jan. 17, 2024, 2:46 p.m. UTC
  We want to be able to transfer ftrace state from one kernel to the next.
To start off with, let's establish all the boiler plate to get a write
hook when KHO wants to serialize and fill out basic data.

Follow-up patches will fill in serialization of ring buffers and events.

Signed-off-by: Alexander Graf <graf@amazon.com>

---

v1 -> v2:

  - Remove ifdefs
---
 kernel/trace/trace.c | 47 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)
  

Comments

Pratyush Yadav Feb. 16, 2024, 3:36 p.m. UTC | #1
Hi,

On Wed, Jan 17 2024, Alexander Graf wrote:

> We want to be able to transfer ftrace state from one kernel to the next.
> To start off with, let's establish all the boiler plate to get a write
> hook when KHO wants to serialize and fill out basic data.
>
> Follow-up patches will fill in serialization of ring buffers and events.
>
> Signed-off-by: Alexander Graf <graf@amazon.com>
>
> ---
>
> v1 -> v2:
>
>   - Remove ifdefs
> ---
>  kernel/trace/trace.c | 47 ++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 47 insertions(+)
>
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index a0defe156b57..9a0d96975c9c 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -32,6 +32,7 @@
>  #include <linux/percpu.h>
>  #include <linux/splice.h>
>  #include <linux/kdebug.h>
> +#include <linux/kexec.h>
>  #include <linux/string.h>
>  #include <linux/mount.h>
>  #include <linux/rwsem.h>
> @@ -866,6 +867,8 @@ static struct tracer		*trace_types __read_mostly;
>   */
>  DEFINE_MUTEX(trace_types_lock);
>
> +static bool trace_in_kho;
> +
>  /*
>   * serialize the access of the ring buffer
>   *
> @@ -10574,12 +10577,56 @@ void __init early_trace_init(void)
>  	init_events();
>  }
>
> +static int trace_kho_notifier(struct notifier_block *self,
> +			      unsigned long cmd,
> +			      void *v)
> +{
> +	const char compatible[] = "ftrace-v1";
> +	void *fdt = v;
> +	int err = 0;
> +
> +	switch (cmd) {
> +	case KEXEC_KHO_ABORT:
> +		if (trace_in_kho)
> +			mutex_unlock(&trace_types_lock);
> +		trace_in_kho = false;
> +		return NOTIFY_DONE;
> +	case KEXEC_KHO_DUMP:
> +		/* Handled below */
> +		break;
> +	default:
> +		return NOTIFY_BAD;
> +	}
> +
> +	if (unlikely(tracing_disabled))
> +		return NOTIFY_DONE;
> +
> +	err |= fdt_begin_node(fdt, "ftrace");
> +	err |= fdt_property(fdt, "compatible", compatible, sizeof(compatible));
> +	err |= fdt_end_node(fdt);
> +
> +	if (!err) {
> +		/* Hold all future allocations */
> +		mutex_lock(&trace_types_lock);

Say I do "echo 1 | tee /sys/kernel/kho/active". Then the lock is held by
tee, which exits. Then I later I do "echo 0 | tee
/sys/kernel/kho/active". This time another tee task unlocks the lock. So
it is not being unlocked by the same task that locked it. The comment
above mutex_lock() definition says:

    The mutex must later on be released by the same task that acquired
    it. Recursive locking is not allowed. The task may not exit without
    first unlocking the mutex.

I tested your code and it happens to work because the unlock always
happened to take the fast path which does not sanity-check the owner.
Still, this is not the correct thing to do.

> +		trace_in_kho = true;
> +	}
> +
> +	return err ? NOTIFY_BAD : NOTIFY_DONE;
> +}
> +
> +static struct notifier_block trace_kho_nb = {
> +	.notifier_call = trace_kho_notifier,
> +};
> +
>  void __init trace_init(void)
>  {
>  	trace_event_init();
>
>  	if (boot_instance_index)
>  		enable_instances();
> +
> +	if (IS_ENABLED(CONFIG_FTRACE_KHO))
> +		register_kho_notifier(&trace_kho_nb);
>  }
>
>  __init static void clear_boot_tracer(void)

--
Regards,
Pratyush Yadav



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879
  

Patch

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a0defe156b57..9a0d96975c9c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -32,6 +32,7 @@ 
 #include <linux/percpu.h>
 #include <linux/splice.h>
 #include <linux/kdebug.h>
+#include <linux/kexec.h>
 #include <linux/string.h>
 #include <linux/mount.h>
 #include <linux/rwsem.h>
@@ -866,6 +867,8 @@  static struct tracer		*trace_types __read_mostly;
  */
 DEFINE_MUTEX(trace_types_lock);
 
+static bool trace_in_kho;
+
 /*
  * serialize the access of the ring buffer
  *
@@ -10574,12 +10577,56 @@  void __init early_trace_init(void)
 	init_events();
 }
 
+static int trace_kho_notifier(struct notifier_block *self,
+			      unsigned long cmd,
+			      void *v)
+{
+	const char compatible[] = "ftrace-v1";
+	void *fdt = v;
+	int err = 0;
+
+	switch (cmd) {
+	case KEXEC_KHO_ABORT:
+		if (trace_in_kho)
+			mutex_unlock(&trace_types_lock);
+		trace_in_kho = false;
+		return NOTIFY_DONE;
+	case KEXEC_KHO_DUMP:
+		/* Handled below */
+		break;
+	default:
+		return NOTIFY_BAD;
+	}
+
+	if (unlikely(tracing_disabled))
+		return NOTIFY_DONE;
+
+	err |= fdt_begin_node(fdt, "ftrace");
+	err |= fdt_property(fdt, "compatible", compatible, sizeof(compatible));
+	err |= fdt_end_node(fdt);
+
+	if (!err) {
+		/* Hold all future allocations */
+		mutex_lock(&trace_types_lock);
+		trace_in_kho = true;
+	}
+
+	return err ? NOTIFY_BAD : NOTIFY_DONE;
+}
+
+static struct notifier_block trace_kho_nb = {
+	.notifier_call = trace_kho_notifier,
+};
+
 void __init trace_init(void)
 {
 	trace_event_init();
 
 	if (boot_instance_index)
 		enable_instances();
+
+	if (IS_ENABLED(CONFIG_FTRACE_KHO))
+		register_kho_notifier(&trace_kho_nb);
 }
 
 __init static void clear_boot_tracer(void)