@@ -84,20 +84,23 @@ void ring_buffer_discard_commit(struct trace_buffer *buffer,
/*
* size is in bytes for each per CPU buffer.
*/
-struct trace_buffer *
-__ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *key);
+struct trace_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
+ struct lock_class_key *key,
+ int tr_off);
/*
* Because the ring buffer is generic, if other users of the ring buffer get
* traced by ftrace, it can produce lockdep warnings. We need to keep each
* ring buffer's lock class separate.
*/
-#define ring_buffer_alloc(size, flags) \
-({ \
- static struct lock_class_key __key; \
- __ring_buffer_alloc((size), (flags), &__key); \
+#define ring_buffer_alloc_kho(size, flags, tr_off) \
+({ \
+ static struct lock_class_key __key; \
+ __ring_buffer_alloc((size), (flags), &__key, tr_off); \
})
+#define ring_buffer_alloc(size, flags) ring_buffer_alloc_kho(size, flags, 0)
+
int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full);
__poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
struct file *filp, poll_table *poll_table, int full);
@@ -558,6 +558,7 @@ struct trace_buffer {
struct rb_irq_work irq_work;
bool time_stamp_abs;
+ int tr_off;
};
struct ring_buffer_iter {
@@ -574,6 +575,15 @@ struct ring_buffer_iter {
int missed_events;
};
+struct rb_kho_cpu {
+ const struct kho_mem *mem;
+ uint32_t nr_mems;
+};
+
+static int rb_kho_replace_buffers(struct ring_buffer_per_cpu *cpu_buffer,
+ struct rb_kho_cpu *kho);
+static int rb_kho_read_cpu(int tr_off, int cpu, struct rb_kho_cpu *kho);
+
#ifdef RB_TIME_32
/*
@@ -1768,12 +1778,15 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
* drop data when the tail hits the head.
*/
struct trace_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
- struct lock_class_key *key)
+ struct lock_class_key *key,
+ int tr_off)
{
+ int cpu = raw_smp_processor_id();
+ struct rb_kho_cpu kho = {};
struct trace_buffer *buffer;
+ bool use_kho = false;
long nr_pages;
int bsize;
- int cpu;
int ret;
/* keep it in its own cache line */
@@ -1786,9 +1799,16 @@ struct trace_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
goto fail_free_buffer;
nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
+ if (!rb_kho_read_cpu(tr_off, cpu, &kho) && kho.nr_mems > 4) {
+ nr_pages = kho.nr_mems / 2;
+ use_kho = true;
+ pr_debug("Using kho on CPU [%03d]", cpu);
+ }
+
buffer->flags = flags;
buffer->clock = trace_clock_local;
buffer->reader_lock_key = key;
+ buffer->tr_off = tr_off;
init_irq_work(&buffer->irq_work.work, rb_wake_up_waiters);
init_waitqueue_head(&buffer->irq_work.waiters);
@@ -1805,12 +1825,14 @@ struct trace_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
if (!buffer->buffers)
goto fail_free_cpumask;
- cpu = raw_smp_processor_id();
cpumask_set_cpu(cpu, buffer->cpumask);
buffer->buffers[cpu] = rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
if (!buffer->buffers[cpu])
goto fail_free_buffers;
+ if (use_kho && rb_kho_replace_buffers(buffer->buffers[cpu], &kho))
+ pr_warn("Could not revive all previous trace data");
+
ret = cpuhp_state_add_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node);
if (ret < 0)
goto fail_free_buffers;
@@ -5824,7 +5846,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_read_page);
*/
int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node)
{
+ struct rb_kho_cpu kho = {};
struct trace_buffer *buffer;
+ bool use_kho = false;
long nr_pages_same;
int cpu_i;
unsigned long nr_pages;
@@ -5848,6 +5872,12 @@ int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node)
/* allocate minimum pages, user can later expand it */
if (!nr_pages_same)
nr_pages = 2;
+
+ if (!rb_kho_read_cpu(buffer->tr_off, cpu, &kho) && kho.nr_mems > 4) {
+ nr_pages = kho.nr_mems / 2;
+ use_kho = true;
+ }
+
buffer->buffers[cpu] =
rb_allocate_cpu_buffer(buffer, nr_pages, cpu);
if (!buffer->buffers[cpu]) {
@@ -5855,13 +5885,143 @@ int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node)
cpu);
return -ENOMEM;
}
+
+ if (use_kho && rb_kho_replace_buffers(buffer->buffers[cpu], &kho))
+ pr_warn("Could not revive all previous trace data");
+
smp_wmb();
cpumask_set_cpu(cpu, buffer->cpumask);
return 0;
}
-#ifdef CONFIG_FTRACE_KHO
-static int rb_kho_write_cpu(void *fdt, struct trace_buffer *buffer, int cpu)
+static int rb_kho_replace_buffers(struct ring_buffer_per_cpu *cpu_buffer,
+ struct rb_kho_cpu *kho)
+{
+ bool first_loop = true;
+ struct list_head *tmp;
+ int err = 0;
+ int i = 0;
+
+ if (!IS_ENABLED(CONFIG_FTRACE_KHO))
+ return -EINVAL;
+
+ if (kho->nr_mems != cpu_buffer->nr_pages * 2)
+ return -EINVAL;
+
+ for (tmp = rb_list_head(cpu_buffer->pages);
+ tmp != rb_list_head(cpu_buffer->pages) || first_loop;
+ tmp = rb_list_head(tmp->next), first_loop = false) {
+ struct buffer_page *bpage = (struct buffer_page *)tmp;
+ const struct kho_mem *mem_bpage = &kho->mem[i++];
+ const struct kho_mem *mem_page = &kho->mem[i++];
+ const uint64_t rb_page_head = 1;
+ struct buffer_page *old_bpage;
+ void *old_page;
+
+ old_bpage = __va(mem_bpage->addr);
+ if (!bpage)
+ goto out;
+
+ if ((ulong)old_bpage->list.next & rb_page_head) {
+ struct list_head *new_lhead;
+ struct buffer_page *new_head;
+
+ new_lhead = rb_list_head(bpage->list.next);
+ new_head = (struct buffer_page *)new_lhead;
+
+ /* Assume the buffer is completely full */
+ cpu_buffer->tail_page = bpage;
+ cpu_buffer->commit_page = bpage;
+ /* Set the head pointers to what they were before */
+ cpu_buffer->head_page->list.prev->next = (struct list_head *)
+ ((ulong)cpu_buffer->head_page->list.prev->next & ~rb_page_head);
+ cpu_buffer->head_page = new_head;
+ bpage->list.next = (struct list_head *)((ulong)new_lhead | rb_page_head);
+ }
+
+ if (rb_page_entries(old_bpage) || rb_page_write(old_bpage)) {
+ /*
+ * We want to recycle the pre-kho page, it contains
+ * trace data. To do so, we unreserve it and swap the
+ * current data page with the pre-kho one
+ */
+ old_page = kho_claim_mem(mem_page);
+
+ /* Recycle the old page, it contains data */
+ free_page((ulong)bpage->page);
+ bpage->page = old_page;
+
+ bpage->write = old_bpage->write;
+ bpage->entries = old_bpage->entries;
+ bpage->real_end = old_bpage->real_end;
+
+ local_inc(&cpu_buffer->pages_touched);
+ } else {
+ kho_return_mem(mem_page);
+ }
+
+ kho_return_mem(mem_bpage);
+ }
+
+out:
+ return err;
+}
+
+static int rb_kho_read_cpu(int tr_off, int cpu, struct rb_kho_cpu *kho)
+{
+ const void *fdt = kho_get_fdt();
+ int mem_len;
+ int err = 0;
+ char *path;
+ int off;
+
+ if (!IS_ENABLED(CONFIG_FTRACE_KHO))
+ return -EINVAL;
+
+ if (!tr_off || !fdt || !kho)
+ return -EINVAL;
+
+ path = kasprintf(GFP_KERNEL, "cpu%x", cpu);
+ if (!path)
+ return -ENOMEM;
+
+ pr_debug("Trying to revive trace cpu '%s'", path);
+
+ off = fdt_subnode_offset(fdt, tr_off, path);
+ if (off < 0) {
+ pr_debug("Could not find '%s' in DT", path);
+ err = -ENOENT;
+ goto out;
+ }
+
+ err = fdt_node_check_compatible(fdt, off, "ftrace,cpu-v1");
+ if (err) {
+ pr_warn("Node '%s' has invalid compatible", path);
+ err = -EINVAL;
+ goto out;
+ }
+
+ kho->mem = fdt_getprop(fdt, off, "mem", &mem_len);
+ if (!kho->mem) {
+ pr_warn("Node '%s' has invalid mem property", path);
+ err = -EINVAL;
+ goto out;
+ }
+
+ kho->nr_mems = mem_len / sizeof(*kho->mem);
+
+ /* Should follow "bpage 0, page 0, bpage 1, page 1, ..." pattern */
+ if ((kho->nr_mems & 1)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+out:
+ kfree(path);
+ return err;
+}
+
+static int __maybe_unused rb_kho_write_cpu(void *fdt, struct trace_buffer *buffer, int cpu)
{
int i = 0;
int err = 0;
@@ -5921,6 +6081,7 @@ static int rb_kho_write_cpu(void *fdt, struct trace_buffer *buffer, int cpu)
return err;
}
+#ifdef CONFIG_FTRACE_KHO
int ring_buffer_kho_write(void *fdt, struct trace_buffer *buffer)
{
int err, i;
@@ -9362,16 +9362,46 @@ static struct dentry *trace_instance_dir;
static void
init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
+static int trace_kho_off_tr(struct trace_array *tr)
+{
+ const char *name = tr->name ? tr->name : "global_trace";
+ const void *fdt = kho_get_fdt();
+ char *path;
+ int off;
+
+ if (!IS_ENABLED(CONFIG_FTRACE_KHO))
+ return 0;
+
+ if (!fdt)
+ return 0;
+
+ path = kasprintf(GFP_KERNEL, "/ftrace/%s", name);
+ if (!path)
+ return -ENOMEM;
+
+ pr_debug("Trying to revive trace buffer '%s'", path);
+
+ off = fdt_path_offset(fdt, path);
+ if (off < 0) {
+ pr_debug("Could not find '%s' in DT", path);
+ off = 0;
+ }
+
+ kfree(path);
+ return off;
+}
+
static int
allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
{
+ int tr_off = trace_kho_off_tr(tr);
enum ring_buffer_flags rb_flags;
rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
buf->tr = tr;
- buf->buffer = ring_buffer_alloc(size, rb_flags);
+ buf->buffer = ring_buffer_alloc_kho(size, rb_flags, tr_off);
if (!buf->buffer)
return -ENOMEM;