ARM: sh-mobile: Use of_cpu_node_to_id() to read CPU node 'reg'

Message ID 20230319150027.66475-1-robh@kernel.org
State New
Headers
Series ARM: sh-mobile: Use of_cpu_node_to_id() to read CPU node 'reg' |

Commit Message

Rob Herring March 19, 2023, 3 p.m. UTC
  Replace open coded CPU nodes reading of "reg" and translation to logical
ID with of_cpu_node_to_id().

Signed-off-by: Rob Herring <robh@kernel.org>
---
 arch/arm/mach-shmobile/platsmp-apmu.c | 34 +++++++++++----------------
 1 file changed, 14 insertions(+), 20 deletions(-)
  

Comments

Geert Uytterhoeven March 20, 2023, 7:22 p.m. UTC | #1
Hi Rob,

On Sun, Mar 19, 2023 at 4:00 PM Rob Herring <robh@kernel.org> wrote:
> Replace open coded CPU nodes reading of "reg" and translation to logical
> ID with of_cpu_node_to_id().
>
> Signed-off-by: Rob Herring <robh@kernel.org>

Thanks for your patch!

> --- a/arch/arm/mach-shmobile/platsmp-apmu.c
> +++ b/arch/arm/mach-shmobile/platsmp-apmu.c
> @@ -10,6 +10,7 @@
>  #include <linux/init.h>
>  #include <linux/io.h>
>  #include <linux/ioport.h>
> +#include <linux/of.h>
>  #include <linux/of_address.h>
>  #include <linux/smp.h>
>  #include <linux/suspend.h>
> @@ -210,7 +211,6 @@ static void apmu_parse_dt(void (*fn)(struct resource *res, int cpu, int bit))
>         struct device_node *np_apmu, *np_cpu;
>         struct resource res;
>         int bit, index;
> -       u32 id;
>
>         for_each_matching_node(np_apmu, apmu_ids) {
>                 /* only enable the cluster that includes the boot CPU */
> @@ -218,33 +218,27 @@ static void apmu_parse_dt(void (*fn)(struct resource *res, int cpu, int bit))
>
>                 for (bit = 0; bit < CONFIG_NR_CPUS; bit++) {

This loops over all CPUs....

>                         np_cpu = of_parse_phandle(np_apmu, "cpus", bit);
> -                       if (np_cpu) {
> -                               if (!of_property_read_u32(np_cpu, "reg", &id)) {
> -                                       if (id == cpu_logical_map(0)) {
> -                                               is_allowed = true;
> -                                               of_node_put(np_cpu);
> -                                               break;
> -                                       }
> -
> -                               }
> +                       if (np_cpu && of_cpu_node_to_id(np_cpu) == 0) {

As of_cpu_node_to_id() uses for_each_possible_cpu(), you're
converting an O(n) operation to O(n²).  I'm sure this can be done
more efficiently, using for_each_possible_cpu() as the outer loop?

Meh, cpu_logical_map() also loops over all CPUs, so it was already
O(n²)... Still, we should do better...

> +                               is_allowed = true;
>                                 of_node_put(np_cpu);
> +                               break;
>                         }
> +                       of_node_put(np_cpu);
>                 }
>                 if (!is_allowed)
>                         continue;
>
>                 for (bit = 0; bit < CONFIG_NR_CPUS; bit++) {
>                         np_cpu = of_parse_phandle(np_apmu, "cpus", bit);
> -                       if (np_cpu) {
> -                               if (!of_property_read_u32(np_cpu, "reg", &id)) {
> -                                       index = get_logical_index(id);
> -                                       if ((index >= 0) &&
> -                                           !of_address_to_resource(np_apmu,
> -                                                                   0, &res))
> -                                               fn(&res, index, bit);
> -                               }
> -                               of_node_put(np_cpu);
> -                       }
> +                       if (!np_cpu)
> +                               continue;
> +
> +                       index = of_cpu_node_to_id(np_cpu);

Likewise.

> +                       if ((index >= 0) &&
> +                           !of_address_to_resource(np_apmu, 0, &res))
> +                               fn(&res, index, bit);
> +
> +                       of_node_put(np_cpu);
>                 }
>         }
>  }

Gr{oetje,eeting}s,

                        Geert
  
Rob Herring March 20, 2023, 10:34 p.m. UTC | #2
On Mon, Mar 20, 2023 at 2:22 PM Geert Uytterhoeven <geert@linux-m68k.org> wrote:
>
> Hi Rob,
>
> On Sun, Mar 19, 2023 at 4:00 PM Rob Herring <robh@kernel.org> wrote:
> > Replace open coded CPU nodes reading of "reg" and translation to logical
> > ID with of_cpu_node_to_id().
> >
> > Signed-off-by: Rob Herring <robh@kernel.org>
>
> Thanks for your patch!
>
> > --- a/arch/arm/mach-shmobile/platsmp-apmu.c
> > +++ b/arch/arm/mach-shmobile/platsmp-apmu.c
> > @@ -10,6 +10,7 @@
> >  #include <linux/init.h>
> >  #include <linux/io.h>
> >  #include <linux/ioport.h>
> > +#include <linux/of.h>
> >  #include <linux/of_address.h>
> >  #include <linux/smp.h>
> >  #include <linux/suspend.h>
> > @@ -210,7 +211,6 @@ static void apmu_parse_dt(void (*fn)(struct resource *res, int cpu, int bit))
> >         struct device_node *np_apmu, *np_cpu;
> >         struct resource res;
> >         int bit, index;
> > -       u32 id;
> >
> >         for_each_matching_node(np_apmu, apmu_ids) {
> >                 /* only enable the cluster that includes the boot CPU */
> > @@ -218,33 +218,27 @@ static void apmu_parse_dt(void (*fn)(struct resource *res, int cpu, int bit))
> >
> >                 for (bit = 0; bit < CONFIG_NR_CPUS; bit++) {
>
> This loops over all CPUs....
>
> >                         np_cpu = of_parse_phandle(np_apmu, "cpus", bit);

Have you looked at what this does? :)

> > -                       if (np_cpu) {
> > -                               if (!of_property_read_u32(np_cpu, "reg", &id)) {
> > -                                       if (id == cpu_logical_map(0)) {
> > -                                               is_allowed = true;
> > -                                               of_node_put(np_cpu);
> > -                                               break;
> > -                                       }
> > -
> > -                               }
> > +                       if (np_cpu && of_cpu_node_to_id(np_cpu) == 0) {
>
> As of_cpu_node_to_id() uses for_each_possible_cpu(), you're
> converting an O(n) operation to O(n²).  I'm sure this can be done
> more efficiently, using for_each_possible_cpu() as the outer loop?
>
> Meh, cpu_logical_map() also loops over all CPUs, so it was already
> O(n²)... Still, we should do better...

Can you measure the performance impact?

I would assume 'cpus' is less than NR_CPUS or possible cpus. We should
cap the outer loop based on 'cpus' length. The simplest fix is bail
when of_parse_phandle() fails. That will work unless you expect empty
entries in 'cpus':

cpus = <&cpu0>, <0>, <&cpu3>;

Otherwise, we'd need to use of_parse_phandle_with_fixed_args() instead
to distinguish empty entry vs. the end of the list. There is a
of_for_each_phandle() iterator which would avoid walking the 'cpus'
entry each time from the beginning, but it's a bit more complicated
since it handles arg cells.

This is the simple fix:

diff --git a/arch/arm/mach-shmobile/platsmp-apmu.c
b/arch/arm/mach-shmobile/platsmp-apmu.c
index 27cfe753c467..ec6f421c0f4d 100644
--- a/arch/arm/mach-shmobile/platsmp-apmu.c
+++ b/arch/arm/mach-shmobile/platsmp-apmu.c
@@ -218,7 +218,9 @@ static void apmu_parse_dt(void (*fn)(struct
resource *res, int cpu, int bit))

                for (bit = 0; bit < CONFIG_NR_CPUS; bit++) {
                        np_cpu = of_parse_phandle(np_apmu, "cpus", bit);
-                       if (np_cpu && of_cpu_node_to_id(np_cpu) == 0) {
+                       if (!np_cpu)
+                               break;
+                       if (of_cpu_node_to_id(np_cpu) == 0) {
                                is_allowed = true;
                                of_node_put(np_cpu);
                                break;
@@ -231,7 +233,7 @@ static void apmu_parse_dt(void (*fn)(struct
resource *res, int cpu, int bit))
                for (bit = 0; bit < CONFIG_NR_CPUS; bit++) {
                        np_cpu = of_parse_phandle(np_apmu, "cpus", bit);
                        if (!np_cpu)
-                               continue;
+                               break;

                        index = of_cpu_node_to_id(np_cpu);
                        if ((index >= 0) &&
  

Patch

diff --git a/arch/arm/mach-shmobile/platsmp-apmu.c b/arch/arm/mach-shmobile/platsmp-apmu.c
index e771ce70e132..27cfe753c467 100644
--- a/arch/arm/mach-shmobile/platsmp-apmu.c
+++ b/arch/arm/mach-shmobile/platsmp-apmu.c
@@ -10,6 +10,7 @@ 
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/ioport.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/smp.h>
 #include <linux/suspend.h>
@@ -210,7 +211,6 @@  static void apmu_parse_dt(void (*fn)(struct resource *res, int cpu, int bit))
 	struct device_node *np_apmu, *np_cpu;
 	struct resource res;
 	int bit, index;
-	u32 id;
 
 	for_each_matching_node(np_apmu, apmu_ids) {
 		/* only enable the cluster that includes the boot CPU */
@@ -218,33 +218,27 @@  static void apmu_parse_dt(void (*fn)(struct resource *res, int cpu, int bit))
 
 		for (bit = 0; bit < CONFIG_NR_CPUS; bit++) {
 			np_cpu = of_parse_phandle(np_apmu, "cpus", bit);
-			if (np_cpu) {
-				if (!of_property_read_u32(np_cpu, "reg", &id)) {
-					if (id == cpu_logical_map(0)) {
-						is_allowed = true;
-						of_node_put(np_cpu);
-						break;
-					}
-
-				}
+			if (np_cpu && of_cpu_node_to_id(np_cpu) == 0) {
+				is_allowed = true;
 				of_node_put(np_cpu);
+				break;
 			}
+			of_node_put(np_cpu);
 		}
 		if (!is_allowed)
 			continue;
 
 		for (bit = 0; bit < CONFIG_NR_CPUS; bit++) {
 			np_cpu = of_parse_phandle(np_apmu, "cpus", bit);
-			if (np_cpu) {
-				if (!of_property_read_u32(np_cpu, "reg", &id)) {
-					index = get_logical_index(id);
-					if ((index >= 0) &&
-					    !of_address_to_resource(np_apmu,
-								    0, &res))
-						fn(&res, index, bit);
-				}
-				of_node_put(np_cpu);
-			}
+			if (!np_cpu)
+				continue;
+
+			index = of_cpu_node_to_id(np_cpu);
+			if ((index >= 0) &&
+			    !of_address_to_resource(np_apmu, 0, &res))
+				fn(&res, index, bit);
+
+			of_node_put(np_cpu);
 		}
 	}
 }