[2/2] clk: Add trace events for rate requests
Commit Message
It is currently fairly difficult to follow what clk_rate_request are
issued, and how they have been modified once done.
Indeed, there's multiple paths that can be taken, some functions are
recursive and will just forward the request to its parent, etc.
Adding a lot of debug prints is just not very convenient, so let's add
trace events for the clock requests, one before they are submitted and
one after they are returned.
That way we can simply toggle the tracing on without modifying the
kernel code and without affecting performances or the kernel logs too
much.
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
---
drivers/clk/clk.c | 31 +++++++++++++++++++++++++++++++
include/trace/events/clk.h | 43 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 74 insertions(+)
Comments
On Tue, 18 Oct 2022 15:56:42 +0200
Maxime Ripard <maxime@cerno.tech> wrote:
> It is currently fairly difficult to follow what clk_rate_request are
> issued, and how they have been modified once done.
>
> Indeed, there's multiple paths that can be taken, some functions are
> recursive and will just forward the request to its parent, etc.
>
> Adding a lot of debug prints is just not very convenient, so let's add
> trace events for the clock requests, one before they are submitted and
> one after they are returned.
>
> That way we can simply toggle the tracing on without modifying the
> kernel code and without affecting performances or the kernel logs too
> much.
>
> Signed-off-by: Maxime Ripard <maxime@cerno.tech>
From a tracing point of view:
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
-- Steve
> ---
> drivers/clk/clk.c | 31 +++++++++++++++++++++++++++++++
> include/trace/events/clk.h | 43 +++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 74 insertions(+)
On Tue, 18 Oct 2022 15:56:42 +0200
Maxime Ripard <maxime@cerno.tech> wrote:
I know I reviewed this but looking at this again, I noticed:
> +DECLARE_EVENT_CLASS(clk_rate_request,
> +
> + TP_PROTO(struct clk_rate_request *req),
> +
> + TP_ARGS(req),
> +
> + TP_STRUCT__entry(
> + __string( name, req->core ? req->core->name : "none")
> + __field(unsigned long, min )
> + __field(unsigned long, max )
> + __string( pname, req->best_parent_hw ? clk_hw_get_name(req->best_parent_hw) : "none" )
It may be best to move the two __string() declarations together. The reason
is that dynamic strings (which __string() is) uses 4 bytes embedded in the
first part of the event. Two bytes for offset, where the dynamic string
exists, and two bytes for the strings length.
On 64 bit machines the above has:
__string() 4 bytes
__field(unsigned long) 8 bytes
__field(unsigned long) 8 bytes
__string() 4 bytes
and then another unsigned long field below, which is another 8 bytes.
> + __field(unsigned long, prate )
> + ),
> +
> +
As compilers tend to use word alignment, the above turns into:
__string() 4 bytes
__PADDING__ 4 bytes
__field(unsigned long) 8 bytes
__field(unsigned long) 8 bytes
__string() 4 bytes
__PADDING__ 4 bytes
__field(unsigned long) 8 bytes
Where there will be 8 bytes of padding in that event that wastes precious
ring buffer space. By changing the event to:
TP_STRUCT__entry(
__string( name, req->core ? req->core->name : "none")
__string( pname, req->best_parent_hw ? clk_hw_get_name(req->best_parent_hw) : "none" )
__field(unsigned long, min )
__field(unsigned long, max )
__field(unsigned long, prate )
),
It will turn the size into:
__string() 4 bytes
__string() 4 bytes
__field(unsigned long) 8 bytes
__field(unsigned long) 8 bytes
__field(unsigned long) 8 bytes
With no padding and no wasted space.
I would suggest changing this.
-- Steve
@@ -603,10 +603,15 @@ int clk_mux_determine_rate_flags(struct clk_hw *hw,
}
clk_core_forward_rate_req(core, req, parent, &parent_req, req->rate);
+
+ trace_clk_rate_request_start(&parent_req);
+
ret = clk_core_round_rate_nolock(parent, &parent_req);
if (ret)
return ret;
+ trace_clk_rate_request_done(&parent_req);
+
best = parent_req.rate;
} else if (parent) {
best = clk_core_get_rate_nolock(parent);
@@ -630,10 +635,15 @@ int clk_mux_determine_rate_flags(struct clk_hw *hw,
struct clk_rate_request parent_req;
clk_core_forward_rate_req(core, req, parent, &parent_req, req->rate);
+
+ trace_clk_rate_request_start(&parent_req);
+
ret = clk_core_round_rate_nolock(parent, &parent_req);
if (ret)
continue;
+ trace_clk_rate_request_done(&parent_req);
+
parent_rate = parent_req.rate;
} else {
parent_rate = clk_core_get_rate_nolock(parent);
@@ -1551,10 +1561,15 @@ static int clk_core_round_rate_nolock(struct clk_core *core,
struct clk_rate_request parent_req;
clk_core_forward_rate_req(core, req, core->parent, &parent_req, req->rate);
+
+ trace_clk_rate_request_start(&parent_req);
+
ret = clk_core_round_rate_nolock(core->parent, &parent_req);
if (ret)
return ret;
+ trace_clk_rate_request_done(&parent_req);
+
req->best_parent_rate = parent_req.rate;
req->rate = parent_req.rate;
@@ -1605,10 +1620,14 @@ unsigned long clk_hw_round_rate(struct clk_hw *hw, unsigned long rate)
clk_core_init_rate_req(hw->core, &req, rate);
+ trace_clk_rate_request_start(&req);
+
ret = clk_core_round_rate_nolock(hw->core, &req);
if (ret)
return 0;
+ trace_clk_rate_request_done(&req);
+
return req.rate;
}
EXPORT_SYMBOL_GPL(clk_hw_round_rate);
@@ -1637,8 +1656,12 @@ long clk_round_rate(struct clk *clk, unsigned long rate)
clk_core_init_rate_req(clk->core, &req, rate);
+ trace_clk_rate_request_start(&req);
+
ret = clk_core_round_rate_nolock(clk->core, &req);
+ trace_clk_rate_request_done(&req);
+
if (clk->exclusive_count)
clk_core_rate_protect(clk->core);
@@ -2130,10 +2153,14 @@ static struct clk_core *clk_calc_new_rates(struct clk_core *core,
clk_core_init_rate_req(core, &req, rate);
+ trace_clk_rate_request_start(&req);
+
ret = clk_core_determine_round_nolock(core, &req);
if (ret < 0)
return NULL;
+ trace_clk_rate_request_done(&req);
+
best_parent_rate = req.best_parent_rate;
new_rate = req.rate;
parent = req.best_parent_hw ? req.best_parent_hw->core : NULL;
@@ -2329,8 +2356,12 @@ static unsigned long clk_core_req_round_rate_nolock(struct clk_core *core,
clk_core_init_rate_req(core, &req, req_rate);
+ trace_clk_rate_request_start(&req);
+
ret = clk_core_round_rate_nolock(core, &req);
+ trace_clk_rate_request_done(&req);
+
/* restore the protection */
clk_core_rate_restore_protect(core, cnt);
@@ -264,6 +264,49 @@ DEFINE_EVENT(clk_duty_cycle, clk_set_duty_cycle_complete,
TP_ARGS(core, duty)
);
+DECLARE_EVENT_CLASS(clk_rate_request,
+
+ TP_PROTO(struct clk_rate_request *req),
+
+ TP_ARGS(req),
+
+ TP_STRUCT__entry(
+ __string( name, req->core ? req->core->name : "none")
+ __field(unsigned long, min )
+ __field(unsigned long, max )
+ __string( pname, req->best_parent_hw ? clk_hw_get_name(req->best_parent_hw) : "none" )
+ __field(unsigned long, prate )
+ ),
+
+ TP_fast_assign(
+ __assign_str(name, req->core ? req->core->name : "none");
+ __assign_str(pname, req->best_parent_hw ? clk_hw_get_name(req->best_parent_hw) : "none");
+ __entry->min = req->min_rate;
+ __entry->max = req->max_rate;
+ __entry->prate = req->best_parent_rate;
+ ),
+
+ TP_printk("%s min %lu max %lu, parent %s (%lu)", __get_str(name),
+ (unsigned long)__entry->min,
+ (unsigned long)__entry->max,
+ __get_str(pname),
+ (unsigned long)__entry->prate)
+);
+
+DEFINE_EVENT(clk_rate_request, clk_rate_request_start,
+
+ TP_PROTO(struct clk_rate_request *req),
+
+ TP_ARGS(req)
+);
+
+DEFINE_EVENT(clk_rate_request, clk_rate_request_done,
+
+ TP_PROTO(struct clk_rate_request *req),
+
+ TP_ARGS(req)
+);
+
#endif /* _TRACE_CLK_H */
/* This part must be outside protection */