[2/2] clk: Add trace events for rate requests

Message ID 20221018-clk-rate-request-tracing-v1-2-6f3aa0b0b9de@cerno.tech
State New
Headers
Series clk: Rate Request Tracing |

Commit Message

Maxime Ripard Oct. 18, 2022, 1:56 p.m. UTC
  It is currently fairly difficult to follow what clk_rate_request are
issued, and how they have been modified once done.

Indeed, there's multiple paths that can be taken, some functions are
recursive and will just forward the request to its parent, etc.

Adding a lot of debug prints is just not very convenient, so let's add
trace events for the clock requests, one before they are submitted and
one after they are returned.

That way we can simply toggle the tracing on without modifying the
kernel code and without affecting performances or the kernel logs too
much.

Signed-off-by: Maxime Ripard <maxime@cerno.tech>
---
 drivers/clk/clk.c          | 31 +++++++++++++++++++++++++++++++
 include/trace/events/clk.h | 43 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 74 insertions(+)
  

Comments

Steven Rostedt Oct. 18, 2022, 2:24 p.m. UTC | #1
On Tue, 18 Oct 2022 15:56:42 +0200
Maxime Ripard <maxime@cerno.tech> wrote:

> It is currently fairly difficult to follow what clk_rate_request are
> issued, and how they have been modified once done.
> 
> Indeed, there's multiple paths that can be taken, some functions are
> recursive and will just forward the request to its parent, etc.
> 
> Adding a lot of debug prints is just not very convenient, so let's add
> trace events for the clock requests, one before they are submitted and
> one after they are returned.
> 
> That way we can simply toggle the tracing on without modifying the
> kernel code and without affecting performances or the kernel logs too
> much.
> 
> Signed-off-by: Maxime Ripard <maxime@cerno.tech>

From a tracing point of view:

Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>

-- Steve

> ---
>  drivers/clk/clk.c          | 31 +++++++++++++++++++++++++++++++
>  include/trace/events/clk.h | 43 +++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 74 insertions(+)
  
Steven Rostedt Oct. 24, 2022, 4:19 p.m. UTC | #2
On Tue, 18 Oct 2022 15:56:42 +0200
Maxime Ripard <maxime@cerno.tech> wrote:

I know I reviewed this but looking at this again, I noticed:


> +DECLARE_EVENT_CLASS(clk_rate_request,
> +
> +	TP_PROTO(struct clk_rate_request *req),
> +
> +	TP_ARGS(req),
> +
> +	TP_STRUCT__entry(
> +		__string(        name, req->core ? req->core->name : "none")
> +		__field(unsigned long,           min                       )
> +		__field(unsigned long,           max                       )
> +		__string(       pname, req->best_parent_hw ? clk_hw_get_name(req->best_parent_hw) : "none" )

It may be best to move the two __string() declarations together. The reason
is that dynamic strings (which __string() is) uses 4 bytes embedded in the
first part of the event. Two bytes for offset, where the dynamic string
exists, and two bytes for the strings length.

On 64 bit machines the above has:

	__string()		4 bytes
	__field(unsigned long)	8 bytes
	__field(unsigned long)	8 bytes
	__string()		4 bytes

and then another unsigned long field below, which is another 8 bytes.

> +		__field(unsigned long,           prate                     )
> +	),
> +
> +	

As compilers tend to use word alignment, the above turns into:

	__string()		4 bytes

	__PADDING__		4 bytes

	__field(unsigned long)	8 bytes
	__field(unsigned long)	8 bytes
	__string()		4 bytes

	__PADDING__		4 bytes

	__field(unsigned long)	8 bytes


Where there will be 8 bytes of padding in that event that wastes precious
ring buffer space. By changing the event to:

	TP_STRUCT__entry(
		__string(        name, req->core ? req->core->name : "none")
		__string(       pname, req->best_parent_hw ? clk_hw_get_name(req->best_parent_hw) : "none" )
		__field(unsigned long,           min                       )
		__field(unsigned long,           max                       )
		__field(unsigned long,           prate                     )
	),

It will turn the size into:

	__string()		4 bytes
	__string()		4 bytes
	__field(unsigned long)	8 bytes
	__field(unsigned long)	8 bytes
	__field(unsigned long)	8 bytes

With no padding and no wasted space.

I would suggest changing this.

-- Steve
  

Patch

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index bc6f964cb676..343c50e7e214 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -603,10 +603,15 @@  int clk_mux_determine_rate_flags(struct clk_hw *hw,
 			}
 
 			clk_core_forward_rate_req(core, req, parent, &parent_req, req->rate);
+
+			trace_clk_rate_request_start(&parent_req);
+
 			ret = clk_core_round_rate_nolock(parent, &parent_req);
 			if (ret)
 				return ret;
 
+			trace_clk_rate_request_done(&parent_req);
+
 			best = parent_req.rate;
 		} else if (parent) {
 			best = clk_core_get_rate_nolock(parent);
@@ -630,10 +635,15 @@  int clk_mux_determine_rate_flags(struct clk_hw *hw,
 			struct clk_rate_request parent_req;
 
 			clk_core_forward_rate_req(core, req, parent, &parent_req, req->rate);
+
+			trace_clk_rate_request_start(&parent_req);
+
 			ret = clk_core_round_rate_nolock(parent, &parent_req);
 			if (ret)
 				continue;
 
+			trace_clk_rate_request_done(&parent_req);
+
 			parent_rate = parent_req.rate;
 		} else {
 			parent_rate = clk_core_get_rate_nolock(parent);
@@ -1551,10 +1561,15 @@  static int clk_core_round_rate_nolock(struct clk_core *core,
 		struct clk_rate_request parent_req;
 
 		clk_core_forward_rate_req(core, req, core->parent, &parent_req, req->rate);
+
+		trace_clk_rate_request_start(&parent_req);
+
 		ret = clk_core_round_rate_nolock(core->parent, &parent_req);
 		if (ret)
 			return ret;
 
+		trace_clk_rate_request_done(&parent_req);
+
 		req->best_parent_rate = parent_req.rate;
 		req->rate = parent_req.rate;
 
@@ -1605,10 +1620,14 @@  unsigned long clk_hw_round_rate(struct clk_hw *hw, unsigned long rate)
 
 	clk_core_init_rate_req(hw->core, &req, rate);
 
+	trace_clk_rate_request_start(&req);
+
 	ret = clk_core_round_rate_nolock(hw->core, &req);
 	if (ret)
 		return 0;
 
+	trace_clk_rate_request_done(&req);
+
 	return req.rate;
 }
 EXPORT_SYMBOL_GPL(clk_hw_round_rate);
@@ -1637,8 +1656,12 @@  long clk_round_rate(struct clk *clk, unsigned long rate)
 
 	clk_core_init_rate_req(clk->core, &req, rate);
 
+	trace_clk_rate_request_start(&req);
+
 	ret = clk_core_round_rate_nolock(clk->core, &req);
 
+	trace_clk_rate_request_done(&req);
+
 	if (clk->exclusive_count)
 		clk_core_rate_protect(clk->core);
 
@@ -2130,10 +2153,14 @@  static struct clk_core *clk_calc_new_rates(struct clk_core *core,
 
 		clk_core_init_rate_req(core, &req, rate);
 
+		trace_clk_rate_request_start(&req);
+
 		ret = clk_core_determine_round_nolock(core, &req);
 		if (ret < 0)
 			return NULL;
 
+		trace_clk_rate_request_done(&req);
+
 		best_parent_rate = req.best_parent_rate;
 		new_rate = req.rate;
 		parent = req.best_parent_hw ? req.best_parent_hw->core : NULL;
@@ -2329,8 +2356,12 @@  static unsigned long clk_core_req_round_rate_nolock(struct clk_core *core,
 
 	clk_core_init_rate_req(core, &req, req_rate);
 
+	trace_clk_rate_request_start(&req);
+
 	ret = clk_core_round_rate_nolock(core, &req);
 
+	trace_clk_rate_request_done(&req);
+
 	/* restore the protection */
 	clk_core_rate_restore_protect(core, cnt);
 
diff --git a/include/trace/events/clk.h b/include/trace/events/clk.h
index e19edc63ee95..b19f5a549219 100644
--- a/include/trace/events/clk.h
+++ b/include/trace/events/clk.h
@@ -264,6 +264,49 @@  DEFINE_EVENT(clk_duty_cycle, clk_set_duty_cycle_complete,
 	TP_ARGS(core, duty)
 );
 
+DECLARE_EVENT_CLASS(clk_rate_request,
+
+	TP_PROTO(struct clk_rate_request *req),
+
+	TP_ARGS(req),
+
+	TP_STRUCT__entry(
+		__string(        name, req->core ? req->core->name : "none")
+		__field(unsigned long,           min                       )
+		__field(unsigned long,           max                       )
+		__string(       pname, req->best_parent_hw ? clk_hw_get_name(req->best_parent_hw) : "none" )
+		__field(unsigned long,           prate                     )
+	),
+
+	TP_fast_assign(
+		__assign_str(name, req->core ? req->core->name : "none");
+		__assign_str(pname, req->best_parent_hw ? clk_hw_get_name(req->best_parent_hw) : "none");
+		__entry->min = req->min_rate;
+		__entry->max = req->max_rate;
+		__entry->prate = req->best_parent_rate;
+	),
+
+	TP_printk("%s min %lu max %lu, parent %s (%lu)", __get_str(name),
+		  (unsigned long)__entry->min,
+		  (unsigned long)__entry->max,
+		  __get_str(pname),
+		  (unsigned long)__entry->prate)
+);
+
+DEFINE_EVENT(clk_rate_request, clk_rate_request_start,
+
+	TP_PROTO(struct clk_rate_request *req),
+
+	TP_ARGS(req)
+);
+
+DEFINE_EVENT(clk_rate_request, clk_rate_request_done,
+
+	TP_PROTO(struct clk_rate_request *req),
+
+	TP_ARGS(req)
+);
+
 #endif /* _TRACE_CLK_H */
 
 /* This part must be outside protection */