Hi,
I found strange behavior that if we set two or more probes on the same function,
its callback called twice or more. Also, the stacktrace failed.
And I found the root cause is here;
On Wed, 8 Nov 2023 23:26:42 +0900
"Masami Hiramatsu (Google)" <mhiramat@kernel.org> wrote:
> @@ -910,9 +917,10 @@ int register_ftrace_graph(struct fgraph_ops *gops)
> */
> ftrace_graph_return = return_run;
> ftrace_graph_entry = entry_run;
> -
> - ret = ftrace_startup(&graph_ops, FTRACE_START_FUNC_RET);
> + command = FTRACE_START_FUNC_RET;
> }
> +
> + ret = ftrace_startup(&gops->ops, command);
So this patch registers ftrace_ops for each fgraph_ops to ftrace.
This means that the ftrace_graph_func() will be called twice or more
on the same function.
Thus should I call ftrace_startup() once when the first fgraph_ops
is registered?
No, it's not enough. Actually each fgraph_ops can have different filters.
We need to define a shared filter and combine new filters to one and
use it. We also need to do it when a fgraph is unregistered.
Is there any function which makes a new filter from two (or more) filters?
Or, maybe we can make the common callback to find the previous ret entry on
the ret_stack and reuse it. (In this case we don't need loop on each
fgraph_array entry)
Thank you,
On Thu, 9 Nov 2023 21:18:48 -0500
Steven Rostedt <rostedt@goodmis.org> wrote:
> On Fri, 10 Nov 2023 10:51:54 +0900
> Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:
>
> > So this patch registers ftrace_ops for each fgraph_ops to ftrace.
> > This means that the ftrace_graph_func() will be called twice or more
> > on the same function.
> > Thus should I call ftrace_startup() once when the first fgraph_ops
> > is registered?
> > No, it's not enough. Actually each fgraph_ops can have different filters.
> > We need to define a shared filter and combine new filters to one and
> > use it. We also need to do it when a fgraph is unregistered.
> >
> > Is there any function which makes a new filter from two (or more) filters?
>
> So I'm guessing that we need to have a fgraph_set_filter*() operations?
>
> When one gets added, it needs to update the ftrace_ops to include the added
> functions. Or we need to have a way to create a new hash from all the
> registered fgraph_ops, and have that for the ftrace_ops. Then when it gets
> called, if it has more than one registered function, it needs to iterate
> over the list?
Yes, that is one option, update a global common hash and introduce a new
common ftrace function to run function_graph_enter().
Or, I think keep the current one but iterate ftrace_ops to callback the
function_graph_enter() with ftrace_ops. Then we can get appropriate
fgraph_ops. Ftrace push return trace can skip pushing if ret == return_to_handler.
(maybe this is better to reuse ftrace)
Thank you,
>
> -- Steve
>
>
> >
> > Or, maybe we can make the common callback to find the previous ret entry on
> > the ret_stack and reuse it. (In this case we don't need loop on each
> > fgraph_array entry)
>
@@ -1069,6 +1069,7 @@ extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, struct fgraph
struct fgraph_ops {
trace_func_graph_ent_t entryfunc;
trace_func_graph_ret_t retfunc;
+ struct ftrace_ops ops; /* for the hash lists */
void *private;
};
@@ -17,14 +17,6 @@
#include "ftrace_internal.h"
#include "trace.h"
-#ifdef CONFIG_DYNAMIC_FTRACE
-#define ASSIGN_OPS_HASH(opsname, val) \
- .func_hash = val, \
- .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock),
-#else
-#define ASSIGN_OPS_HASH(opsname, val)
-#endif
-
#define FGRAPH_RET_SIZE sizeof(struct ftrace_ret_stack)
#define FGRAPH_RET_INDEX (FGRAPH_RET_SIZE / sizeof(long))
@@ -337,9 +329,6 @@ int function_graph_enter(unsigned long ret, unsigned long func,
return -EBUSY;
#endif
- if (!ftrace_ops_test(&global_ops, func, NULL))
- return -EBUSY;
-
trace.func = func;
trace.depth = ++current->curr_ret_depth;
@@ -360,7 +349,8 @@ int function_graph_enter(unsigned long ret, unsigned long func,
atomic_inc(¤t->trace_overrun);
break;
}
- if (fgraph_array[i]->entryfunc(&trace, fgraph_array[i])) {
+ if (ftrace_ops_test(&gops->ops, func, NULL) &&
+ gops->entryfunc(&trace, gops)) {
offset = current->curr_ret_stack;
/* Check the top level stored word */
type = get_fgraph_type(current, offset - 1);
@@ -655,17 +645,25 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
}
#endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
-static struct ftrace_ops graph_ops = {
- .func = ftrace_graph_func,
- .flags = FTRACE_OPS_FL_INITIALIZED |
- FTRACE_OPS_FL_PID |
- FTRACE_OPS_GRAPH_STUB,
+void fgraph_init_ops(struct ftrace_ops *dst_ops,
+ struct ftrace_ops *src_ops)
+{
+ dst_ops->func = ftrace_graph_func;
+ dst_ops->flags = FTRACE_OPS_FL_PID | FTRACE_OPS_GRAPH_STUB;
+
#ifdef FTRACE_GRAPH_TRAMP_ADDR
- .trampoline = FTRACE_GRAPH_TRAMP_ADDR,
+ dst_ops->trampoline = FTRACE_GRAPH_TRAMP_ADDR;
/* trampoline_size is only needed for dynamically allocated tramps */
#endif
- ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash)
-};
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+ if (src_ops) {
+ dst_ops->func_hash = &src_ops->local_hash;
+ mutex_init(&dst_ops->local_hash.regex_lock);
+ dst_ops->flags |= FTRACE_OPS_FL_INITIALIZED;
+ }
+#endif
+}
void ftrace_graph_sleep_time_control(bool enable)
{
@@ -870,11 +868,20 @@ static int start_graph_tracing(void)
int register_ftrace_graph(struct fgraph_ops *gops)
{
+ int command = 0;
int ret = 0;
int i;
mutex_lock(&ftrace_lock);
+ if (!gops->ops.func) {
+ gops->ops.flags |= FTRACE_OPS_GRAPH_STUB;
+ gops->ops.func = ftrace_graph_func;
+#ifdef FTRACE_GRAPH_TRAMP_ADDR
+ gops->ops.trampoline = FTRACE_GRAPH_TRAMP_ADDR;
+#endif
+ }
+
if (!fgraph_array[0]) {
/* The array must always have real data on it */
for (i = 0; i < FGRAPH_ARRAY_SIZE; i++)
@@ -910,9 +917,10 @@ int register_ftrace_graph(struct fgraph_ops *gops)
*/
ftrace_graph_return = return_run;
ftrace_graph_entry = entry_run;
-
- ret = ftrace_startup(&graph_ops, FTRACE_START_FUNC_RET);
+ command = FTRACE_START_FUNC_RET;
}
+
+ ret = ftrace_startup(&gops->ops, command);
out:
mutex_unlock(&ftrace_lock);
return ret;
@@ -920,6 +928,7 @@ int register_ftrace_graph(struct fgraph_ops *gops)
void unregister_ftrace_graph(struct fgraph_ops *gops)
{
+ int command = 0;
int i;
mutex_lock(&ftrace_lock);
@@ -942,10 +951,15 @@ void unregister_ftrace_graph(struct fgraph_ops *gops)
}
ftrace_graph_active--;
+
+ if (!ftrace_graph_active)
+ command = FTRACE_STOP_FUNC_RET;
+
+ ftrace_shutdown(&gops->ops, command);
+
if (!ftrace_graph_active) {
ftrace_graph_return = ftrace_stub_graph;
ftrace_graph_entry = ftrace_graph_entry_stub;
- ftrace_shutdown(&graph_ops, FTRACE_STOP_FUNC_RET);
unregister_pm_notifier(&ftrace_suspend_notifier);
unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
}
@@ -3050,6 +3050,8 @@ int ftrace_startup(struct ftrace_ops *ops, int command)
if (unlikely(ftrace_disabled))
return -ENODEV;
+ ftrace_ops_init(ops);
+
ret = __register_ftrace_function(ops);
if (ret)
return ret;
@@ -7319,7 +7321,7 @@ __init void ftrace_init_global_array_ops(struct trace_array *tr)
tr->ops = &global_ops;
tr->ops->private = tr;
ftrace_init_trace_array(tr);
- init_array_fgraph_ops(tr);
+ init_array_fgraph_ops(tr, tr->ops);
}
void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func)
@@ -8051,7 +8053,7 @@ static int register_ftrace_function_nolock(struct ftrace_ops *ops)
*/
int register_ftrace_function(struct ftrace_ops *ops)
{
- int ret;
+ int ret = -1;
lock_direct_mutex();
ret = prepare_direct_functions_for_ipmodify(ops);
@@ -885,8 +885,8 @@ extern int __trace_graph_entry(struct trace_array *tr,
extern void __trace_graph_return(struct trace_array *tr,
struct ftrace_graph_ret *trace,
unsigned int trace_ctx);
-extern void init_array_fgraph_ops(struct trace_array *tr);
-extern int allocate_fgraph_ops(struct trace_array *tr);
+extern void init_array_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops);
+extern int allocate_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops);
extern void free_fgraph_ops(struct trace_array *tr);
#ifdef CONFIG_DYNAMIC_FTRACE
@@ -969,6 +969,7 @@ static inline int ftrace_graph_notrace_addr(unsigned long addr)
preempt_enable_notrace();
return ret;
}
+
#else
static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace)
{
@@ -994,18 +995,19 @@ static inline bool ftrace_graph_ignore_func(struct ftrace_graph_ent *trace)
(fgraph_max_depth && trace->depth >= fgraph_max_depth);
}
+void fgraph_init_ops(struct ftrace_ops *dst_ops,
+ struct ftrace_ops *src_ops);
+
#else /* CONFIG_FUNCTION_GRAPH_TRACER */
static inline enum print_line_t
print_graph_function_flags(struct trace_iterator *iter, u32 flags)
{
return TRACE_TYPE_UNHANDLED;
}
-static inline void init_array_fgraph_ops(struct trace_array *tr) { }
-static inline int allocate_fgraph_ops(struct trace_array *tr)
-{
- return 0;
-}
static inline void free_fgraph_ops(struct trace_array *tr) { }
+/* ftrace_ops may not be defined */
+#define init_array_fgraph_ops(tr, ops) do { } while (0)
+#define allocate_fgraph_ops(tr, ops) ({ 0; })
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
extern struct list_head ftrace_pids;
@@ -91,7 +91,7 @@ int ftrace_create_function_files(struct trace_array *tr,
if (!tr->ops)
return -EINVAL;
- ret = allocate_fgraph_ops(tr);
+ ret = allocate_fgraph_ops(tr, tr->ops);
if (ret) {
kfree(tr->ops);
return ret;
@@ -288,7 +288,7 @@ static struct fgraph_ops funcgraph_ops = {
.retfunc = &trace_graph_return,
};
-int allocate_fgraph_ops(struct trace_array *tr)
+int allocate_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops)
{
struct fgraph_ops *gops;
@@ -301,6 +301,9 @@ int allocate_fgraph_ops(struct trace_array *tr)
tr->gops = gops;
gops->private = tr;
+
+ fgraph_init_ops(&gops->ops, ops);
+
return 0;
}
@@ -309,10 +312,11 @@ void free_fgraph_ops(struct trace_array *tr)
kfree(tr->gops);
}
-__init void init_array_fgraph_ops(struct trace_array *tr)
+__init void init_array_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops)
{
tr->gops = &funcgraph_ops;
funcgraph_ops.private = tr;
+ fgraph_init_ops(&tr->gops->ops, ops);
}
static int graph_trace_init(struct trace_array *tr)