[v4] clk: expand clk_ignore_unused mechanism to keep only a few clks on

Message ID 20230329204632.lsiiqf42hrwmn6xm@pengutronix.de
State New
Headers
Series [v4] clk: expand clk_ignore_unused mechanism to keep only a few clks on |

Commit Message

Uwe Kleine-König March 29, 2023, 8:46 p.m. UTC
  Allow to pass an integer n that results in only keeping n unused clocks
enabled.

This helps to debug the problem if you only know that clk_ignore_unused
helps but you have no clue yet which clock is the culprit.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
---
Hello Stephen,

On Wed, Mar 29, 2023 at 12:49:19PM -0700, Stephen Boyd wrote:
> Quoting Uwe Kleine-König (2022-10-26 08:18:12)
> >  Documentation/driver-api/clk.rst |  4 +++-
> 
> No update to Documentation/admin-guide/kernel-parameters.txt?

Fair request. I mentioned it there that you can assign an integer but
refer to Documentation/driver-api/clk.rst for the details. 

> > diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
> > index c3c3f8c07258..356119a7e5fe 100644
> > --- a/drivers/clk/clk.c
> > +++ b/drivers/clk/clk.c
> > [...]
> > @@ -1352,12 +1354,17 @@ static void __init clk_disable_unused_subtree(struct clk_core *core)
> >          * back to .disable
> >          */
> >         if (clk_core_is_enabled(core)) {
> > -               trace_clk_disable(core);
> > -               if (core->ops->disable_unused)
> > -                       core->ops->disable_unused(core->hw);
> > -               else if (core->ops->disable)
> > -                       core->ops->disable(core->hw);
> > -               trace_clk_disable_complete(core);
> > +               if (clk_unused_keep_on) {
> > +                       pr_warn("Keep unused clk \"%s\" on\n", core->name);
> > +                       clk_unused_keep_on -= 1;
> > +               } else {
> > +                       trace_clk_disable(core);
> 
> We have trace_clk_disable() here. Can you have this tracepoint print to
> the kernel log and watch over serial console? That would be faster than
> bisecting.

Well no, that doesn't work for all the problems where
clk_ignore_unused=7 could be useful. Consider that e.g. you know that
eth0 is broken, but with clk_ignore_unused is works. So one of the (say)
25 nominally unused clks are required for eth0. But it's not possible to
test the network after each of the 25 clk_disable()s. Unless I'm missing
something and you can hook a userspace action on a trace line?!

> > +                       if (core->ops->disable_unused)
> > +                               core->ops->disable_unused(core->hw);
> > +                       else if (core->ops->disable)
> > +                               core->ops->disable(core->hw);
> > +                       trace_clk_disable_complete(core);
> > +               }
> >         }
> >  
> >  unlock_out:
> > @@ -1369,9 +1376,17 @@ static void __init clk_disable_unused_subtree(struct clk_core *core)
> >  }
> >  
> >  static bool clk_ignore_unused __initdata;
> > -static int __init clk_ignore_unused_setup(char *__unused)
> > +static int __init clk_ignore_unused_setup(char *keep)
> >  {
> > -       clk_ignore_unused = true;
> > +       if (*keep == '=') {
> > +               int ret;
> > +
> > +               ret = kstrtouint(keep + 1, 0, &clk_unused_keep_on);
> > +               if (ret < 0)
> 
> Could omit 'ret' and just have if (kstrtouint(..))

I don't feel strong, but I think having that on two lines is easier to
read. So I kept it as is, but if you insist, I can change.

> > +                       pr_err("Warning: failed to parse clk_ignore_unused parameter, ignoring");
> 
> Missing newline on printk.

fixed.

Best regards
Uwe

 .../admin-guide/kernel-parameters.txt         |  6 ++--
 Documentation/driver-api/clk.rst              |  4 ++-
 drivers/clk/clk.c                             | 33 ++++++++++++++-----
 3 files changed, 32 insertions(+), 11 deletions(-)
  

Comments

Stephen Boyd March 29, 2023, 9:27 p.m. UTC | #1
Quoting Uwe Kleine-König (2023-03-29 13:46:32)
> > > diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
> > > index c3c3f8c07258..356119a7e5fe 100644
> > > --- a/drivers/clk/clk.c
> > > +++ b/drivers/clk/clk.c
> > > [...]
> > > @@ -1352,12 +1354,17 @@ static void __init clk_disable_unused_subtree(struct clk_core *core)
> > >          * back to .disable
> > >          */
> > >         if (clk_core_is_enabled(core)) {
> > > -               trace_clk_disable(core);
> > > -               if (core->ops->disable_unused)
> > > -                       core->ops->disable_unused(core->hw);
> > > -               else if (core->ops->disable)
> > > -                       core->ops->disable(core->hw);
> > > -               trace_clk_disable_complete(core);
> > > +               if (clk_unused_keep_on) {
> > > +                       pr_warn("Keep unused clk \"%s\" on\n", core->name);
> > > +                       clk_unused_keep_on -= 1;
> > > +               } else {
> > > +                       trace_clk_disable(core);
> > 
> > We have trace_clk_disable() here. Can you have this tracepoint print to
> > the kernel log and watch over serial console? That would be faster than
> > bisecting.
> 
> Well no, that doesn't work for all the problems where
> clk_ignore_unused=7 could be useful. Consider that e.g. you know that
> eth0 is broken, but with clk_ignore_unused is works. So one of the (say)
> 25 nominally unused clks are required for eth0. But it's not possible to
> test the network after each of the 25 clk_disable()s. Unless I'm missing
> something and you can hook a userspace action on a trace line?!

In that case it sounds like you want to compile the kernel with the
support for enabling clks from debugfs. Can you use that?
  
Uwe Kleine-König March 30, 2023, 6:06 a.m. UTC | #2
On Wed, Mar 29, 2023 at 02:27:08PM -0700, Stephen Boyd wrote:
> Quoting Uwe Kleine-König (2023-03-29 13:46:32)
> > > > diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
> > > > index c3c3f8c07258..356119a7e5fe 100644
> > > > --- a/drivers/clk/clk.c
> > > > +++ b/drivers/clk/clk.c
> > > > [...]
> > > > @@ -1352,12 +1354,17 @@ static void __init clk_disable_unused_subtree(struct clk_core *core)
> > > >          * back to .disable
> > > >          */
> > > >         if (clk_core_is_enabled(core)) {
> > > > -               trace_clk_disable(core);
> > > > -               if (core->ops->disable_unused)
> > > > -                       core->ops->disable_unused(core->hw);
> > > > -               else if (core->ops->disable)
> > > > -                       core->ops->disable(core->hw);
> > > > -               trace_clk_disable_complete(core);
> > > > +               if (clk_unused_keep_on) {
> > > > +                       pr_warn("Keep unused clk \"%s\" on\n", core->name);
> > > > +                       clk_unused_keep_on -= 1;
> > > > +               } else {
> > > > +                       trace_clk_disable(core);
> > > 
> > > We have trace_clk_disable() here. Can you have this tracepoint print to
> > > the kernel log and watch over serial console? That would be faster than
> > > bisecting.
> > 
> > Well no, that doesn't work for all the problems where
> > clk_ignore_unused=7 could be useful. Consider that e.g. you know that
> > eth0 is broken, but with clk_ignore_unused is works. So one of the (say)
> > 25 nominally unused clks are required for eth0. But it's not possible to
> > test the network after each of the 25 clk_disable()s. Unless I'm missing
> > something and you can hook a userspace action on a trace line?!
> 
> In that case it sounds like you want to compile the kernel with the
> support for enabling clks from debugfs. Can you use that?

In some of the cases that might work. Unless for example the problem
makes the kernel fail to boot or the device is broken when the clk was
disabled and reenable doesn't help?!

Best regards
Uwe
  
Sebastian Reichel March 30, 2023, 3:26 p.m. UTC | #3
Hi,

On Thu, Mar 30, 2023 at 08:06:01AM +0200, Uwe Kleine-König wrote:
> On Wed, Mar 29, 2023 at 02:27:08PM -0700, Stephen Boyd wrote:
> > Quoting Uwe Kleine-König (2023-03-29 13:46:32)
> > > > > diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
> > > > > index c3c3f8c07258..356119a7e5fe 100644
> > > > > --- a/drivers/clk/clk.c
> > > > > +++ b/drivers/clk/clk.c
> > > > > [...]
> > > > > @@ -1352,12 +1354,17 @@ static void __init clk_disable_unused_subtree(struct clk_core *core)
> > > > >          * back to .disable
> > > > >          */
> > > > >         if (clk_core_is_enabled(core)) {
> > > > > -               trace_clk_disable(core);
> > > > > -               if (core->ops->disable_unused)
> > > > > -                       core->ops->disable_unused(core->hw);
> > > > > -               else if (core->ops->disable)
> > > > > -                       core->ops->disable(core->hw);
> > > > > -               trace_clk_disable_complete(core);
> > > > > +               if (clk_unused_keep_on) {
> > > > > +                       pr_warn("Keep unused clk \"%s\" on\n", core->name);
> > > > > +                       clk_unused_keep_on -= 1;
> > > > > +               } else {
> > > > > +                       trace_clk_disable(core);
> > > > 
> > > > We have trace_clk_disable() here. Can you have this tracepoint print to
> > > > the kernel log and watch over serial console? That would be faster than
> > > > bisecting.
> > > 
> > > Well no, that doesn't work for all the problems where
> > > clk_ignore_unused=7 could be useful. Consider that e.g. you know that
> > > eth0 is broken, but with clk_ignore_unused is works. So one of the (say)
> > > 25 nominally unused clks are required for eth0. But it's not possible to
> > > test the network after each of the 25 clk_disable()s. Unless I'm missing
> > > something and you can hook a userspace action on a trace line?!
> > 
> > In that case it sounds like you want to compile the kernel with the
> > support for enabling clks from debugfs. Can you use that?
> 
> In some of the cases that might work. Unless for example the problem
> makes the kernel fail to boot or the device is broken when the clk was
> disabled and reenable doesn't help?!

I recently debugged a similar issue like this:

1. build kernel with CLOCK_ALLOW_WRITE_DEBUGFS
2. boot with clk_ignore_unused, so clocks stay enabled
3. disable clocks via sysfs:
   echo 1 > /sys/kernel/debug/clk/${CLK}/clk_prepare_enable
   echo 0 > /sys/kernel/debug/clk/${CLK}/clk_prepare_enable
4. check if peripheral is still ok
5. repeat step 3 with the next 'interesting' clock

-- Sebastian
  
Uwe Kleine-König March 30, 2023, 5:33 p.m. UTC | #4
Hello Sebastian,

On Thu, Mar 30, 2023 at 05:26:17PM +0200, Sebastian Reichel wrote:
> > > In that case it sounds like you want to compile the kernel with the
> > > support for enabling clks from debugfs. Can you use that?
> > 
> > In some of the cases that might work. Unless for example the problem
> > makes the kernel fail to boot or the device is broken when the clk was
> > disabled and reenable doesn't help?!
> 
> I recently debugged a similar issue like this:
> 
> 1. build kernel with CLOCK_ALLOW_WRITE_DEBUGFS
> 2. boot with clk_ignore_unused, so clocks stay enabled
> 3. disable clocks via sysfs:
>    echo 1 > /sys/kernel/debug/clk/${CLK}/clk_prepare_enable
>    echo 0 > /sys/kernel/debug/clk/${CLK}/clk_prepare_enable
> 4. check if peripheral is still ok
> 5. repeat step 3 with the next 'interesting' clock

Ah, that makes sense. Thanks.

With that I cannot imagine a scenario that I can only debug with
clk_ignore_unused=n. So let's drop my patch.

Best regards
Uwe
  

Patch

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 6221a1d057dd..1a378fe94e48 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -600,8 +600,10 @@ 
 			force such clocks to be always-on nor does it reserve
 			those clocks in any way. This parameter is useful for
 			debug and development, but should not be needed on a
-			platform with proper driver support.  For more
-			information, see Documentation/driver-api/clk.rst.
+			platform with proper driver support.
+			It can take a value (e.g. clk_ignore_unused=7), to only
+			disable some clks. For more information, see
+			Documentation/driver-api/clk.rst.
 
 	clock=		[BUGS=X86-32, HW] gettimeofday clocksource override.
 			[Deprecated]
diff --git a/Documentation/driver-api/clk.rst b/Documentation/driver-api/clk.rst
index 3cad45d14187..65ae7c3e2b33 100644
--- a/Documentation/driver-api/clk.rst
+++ b/Documentation/driver-api/clk.rst
@@ -259,7 +259,9 @@  the disabling means that the driver will remain functional while the issues
 are sorted out.
 
 To bypass this disabling, include "clk_ignore_unused" in the bootargs to the
-kernel.
+kernel. If you pass "clk_ignore_unused=n" (where n is an integer) the first n
+found clocks are not disabled which can be useful for bisecting over the unused
+clks if you don't know yet which of them is reponsible for your problem.
 
 Locking
 =======
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index ae07685c7588..87f605a4f791 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -1343,6 +1343,8 @@  static void __init clk_unprepare_unused_subtree(struct clk_core *core)
 	clk_pm_runtime_put(core);
 }
 
+static unsigned clk_unused_keep_on __initdata;
+
 static void __init clk_disable_unused_subtree(struct clk_core *core)
 {
 	struct clk_core *child;
@@ -1373,12 +1375,17 @@  static void __init clk_disable_unused_subtree(struct clk_core *core)
 	 * back to .disable
 	 */
 	if (clk_core_is_enabled(core)) {
-		trace_clk_disable(core);
-		if (core->ops->disable_unused)
-			core->ops->disable_unused(core->hw);
-		else if (core->ops->disable)
-			core->ops->disable(core->hw);
-		trace_clk_disable_complete(core);
+		if (clk_unused_keep_on) {
+			pr_warn("Keep unused clk \"%s\" on\n", core->name);
+			clk_unused_keep_on -= 1;
+		} else {
+			trace_clk_disable(core);
+			if (core->ops->disable_unused)
+				core->ops->disable_unused(core->hw);
+			else if (core->ops->disable)
+				core->ops->disable(core->hw);
+			trace_clk_disable_complete(core);
+		}
 	}
 
 unlock_out:
@@ -1390,9 +1397,17 @@  static void __init clk_disable_unused_subtree(struct clk_core *core)
 }
 
 static bool clk_ignore_unused __initdata;
-static int __init clk_ignore_unused_setup(char *__unused)
+static int __init clk_ignore_unused_setup(char *keep)
 {
-	clk_ignore_unused = true;
+	if (*keep == '=') {
+		int ret;
+
+		ret = kstrtouint(keep + 1, 0, &clk_unused_keep_on);
+		if (ret < 0)
+			pr_err("Warning: failed to parse clk_ignore_unused parameter, ignoring\n");
+	} else {
+		clk_ignore_unused = true;
+	}
 	return 1;
 }
 __setup("clk_ignore_unused", clk_ignore_unused_setup);
@@ -1404,6 +1419,8 @@  static int __init clk_disable_unused(void)
 	if (clk_ignore_unused) {
 		pr_warn("clk: Not disabling unused clocks\n");
 		return 0;
+	} else if (clk_unused_keep_on) {
+		pr_warn("clk: Not disabling %u unused clocks\n", clk_unused_keep_on);
 	}
 
 	clk_prepare_lock();