[-next,v3] drivers: base: cacheinfo: fix shared_cpu_map

Message ID 20221228032419.1763-2-yongxuan.wang@sifive.com
State New
Headers
Series [-next,v3] drivers: base: cacheinfo: fix shared_cpu_map |

Commit Message

Yong-Xuan Wang Dec. 28, 2022, 3:24 a.m. UTC
  The cacheinfo sets up the shared_cpu_map by checking whether the caches
with the same index are shared between CPUs. However, this will trigger
slab-out-of-bounds access if the CPUs do not have the same cache hierarchy.
Another problem is the mismatched shared_cpu_map when the shared cache does
not have the same index between CPUs.

CPU0	I	D	L3
index	0	1	2	x
	^	^	^	^
index	0	1	2	3
CPU1	I	D	L2	L3

This patch checks each cache is shared with all caches on other CPUs.

Reviewed-by: Pierre Gondois <pierre.gondois@arm.com>
Signed-off-by: Yong-Xuan Wang <yongxuan.wang@sifive.com>
---
 drivers/base/cacheinfo.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)


base-commit: c76083fac3bae1a87ae3d005b5cb1cbc761e31d5
  

Comments

Sudeep Holla Jan. 4, 2023, 10:59 a.m. UTC | #1
On Wed, Dec 28, 2022 at 03:24:19AM +0000, Yong-Xuan Wang wrote:
> The cacheinfo sets up the shared_cpu_map by checking whether the caches
> with the same index are shared between CPUs. However, this will trigger
> slab-out-of-bounds access if the CPUs do not have the same cache hierarchy.
> Another problem is the mismatched shared_cpu_map when the shared cache does
> not have the same index between CPUs.
> 
> CPU0	I	D	L3
> index	0	1	2	x
> 	^	^	^	^
> index	0	1	2	3
> CPU1	I	D	L2	L3
> 
> This patch checks each cache is shared with all caches on other CPUs.
> 

Just curious to know if this is just Qemu config or a real platform.
I had intentionally not supported this to just to get to know when such
h/w appears in the real world 😁.

> Reviewed-by: Pierre Gondois <pierre.gondois@arm.com>
> Signed-off-by: Yong-Xuan Wang <yongxuan.wang@sifive.com>
> ---
>  drivers/base/cacheinfo.c | 25 +++++++++++++++----------
>  1 file changed, 15 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
> index 950b22cdb5f7..dfa804bcf3cc 100644
> --- a/drivers/base/cacheinfo.c
> +++ b/drivers/base/cacheinfo.c
> @@ -256,7 +256,7 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)
>  {
>  	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
>  	struct cacheinfo *this_leaf, *sib_leaf;
> -	unsigned int index;
> +	unsigned int index, sib_index;
>  	int ret = 0;
>  
>  	if (this_cpu_ci->cpu_map_populated)
> @@ -284,11 +284,12 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)
>  
>  			if (i == cpu || !sib_cpu_ci->info_list)
>  				continue;/* skip if itself or no cacheinfo */
> -
> -			sib_leaf = per_cpu_cacheinfo_idx(i, index);
> -			if (cache_leaves_are_shared(this_leaf, sib_leaf)) {
> -				cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map);
> -				cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
> +			for (sib_index = 0; sib_index < cache_leaves(i); sib_index++) {
> +				sib_leaf = per_cpu_cacheinfo_idx(i, sib_index);
> +				if (cache_leaves_are_shared(this_leaf, sib_leaf)) {
> +					cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map);
> +					cpumask_set_cpu(i, &this_leaf->shared_cpu_map);

Does it make sense to break here once we match as it is unlikely to match
with any other indices ?

> +				}
>  			}
>  		}
>  		/* record the maximum cache line size */
> @@ -302,7 +303,7 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)
>  static void cache_shared_cpu_map_remove(unsigned int cpu)
>  {
>  	struct cacheinfo *this_leaf, *sib_leaf;
> -	unsigned int sibling, index;
> +	unsigned int sibling, index, sib_index;
>  
>  	for (index = 0; index < cache_leaves(cpu); index++) {
>  		this_leaf = per_cpu_cacheinfo_idx(cpu, index);
> @@ -313,9 +314,13 @@ static void cache_shared_cpu_map_remove(unsigned int cpu)
>  			if (sibling == cpu || !sib_cpu_ci->info_list)
>  				continue;/* skip if itself or no cacheinfo */
>  
> -			sib_leaf = per_cpu_cacheinfo_idx(sibling, index);
> -			cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map);
> -			cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map);
> +			for (sib_index = 0; sib_index < cache_leaves(sibling); sib_index++) {
> +				sib_leaf = per_cpu_cacheinfo_idx(sibling, sib_index);
> +				if (cache_leaves_are_shared(this_leaf, sib_leaf)) {
> +					cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map);
> +					cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map);

Same comment as above.
  
Yong-Xuan Wang Jan. 17, 2023, 10:02 a.m. UTC | #2
Hi Sudeep,

> On Wed, Jan 4, 2023 at 6:59 PM Sudeep Holla <sudeep.holla@arm.com> wrote:
>
> On Wed, Dec 28, 2022 at 03:24:19AM +0000, Yong-Xuan Wang wrote:
> > The cacheinfo sets up the shared_cpu_map by checking whether the caches
> > with the same index are shared between CPUs. However, this will trigger
> > slab-out-of-bounds access if the CPUs do not have the same cache hierarchy.
> > Another problem is the mismatched shared_cpu_map when the shared cache does
> > not have the same index between CPUs.
> >
> > CPU0  I       D       L3
> > index 0       1       2       x
> >       ^       ^       ^       ^
> > index 0       1       2       3
> > CPU1  I       D       L2      L3
> >
> > This patch checks each cache is shared with all caches on other CPUs.
> >
>
> Just curious to know if this is just Qemu config or a real platform.
> I had intentionally not supported this to just to get to know when such
> h/w appears in the real world 😁.
>

We are trying to build such kind of config in QEMU.

> > Reviewed-by: Pierre Gondois <pierre.gondois@arm.com>
> > Signed-off-by: Yong-Xuan Wang <yongxuan.wang@sifive.com>
> > ---
> >  drivers/base/cacheinfo.c | 25 +++++++++++++++----------
> >  1 file changed, 15 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
> > index 950b22cdb5f7..dfa804bcf3cc 100644
> > --- a/drivers/base/cacheinfo.c
> > +++ b/drivers/base/cacheinfo.c
> > @@ -256,7 +256,7 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)
> >  {
> >       struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
> >       struct cacheinfo *this_leaf, *sib_leaf;
> > -     unsigned int index;
> > +     unsigned int index, sib_index;
> >       int ret = 0;
> >
> >       if (this_cpu_ci->cpu_map_populated)
> > @@ -284,11 +284,12 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)
> >
> >                       if (i == cpu || !sib_cpu_ci->info_list)
> >                               continue;/* skip if itself or no cacheinfo */
> > -
> > -                     sib_leaf = per_cpu_cacheinfo_idx(i, index);
> > -                     if (cache_leaves_are_shared(this_leaf, sib_leaf)) {
> > -                             cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map);
> > -                             cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
> > +                     for (sib_index = 0; sib_index < cache_leaves(i); sib_index++) {
> > +                             sib_leaf = per_cpu_cacheinfo_idx(i, sib_index);
> > +                             if (cache_leaves_are_shared(this_leaf, sib_leaf)) {
> > +                                     cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map);
> > +                                     cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
>
> Does it make sense to break here once we match as it is unlikely to match
> with any other indices ?
>

Yeah. We can break here once we find the shared instance. I'll send a
new version to fix it.
Thank you!

> > +                             }
> >                       }
> >               }
> >               /* record the maximum cache line size */
> > @@ -302,7 +303,7 @@ static int cache_shared_cpu_map_setup(unsigned int cpu)
> >  static void cache_shared_cpu_map_remove(unsigned int cpu)
> >  {
> >       struct cacheinfo *this_leaf, *sib_leaf;
> > -     unsigned int sibling, index;
> > +     unsigned int sibling, index, sib_index;
> >
> >       for (index = 0; index < cache_leaves(cpu); index++) {
> >               this_leaf = per_cpu_cacheinfo_idx(cpu, index);
> > @@ -313,9 +314,13 @@ static void cache_shared_cpu_map_remove(unsigned int cpu)
> >                       if (sibling == cpu || !sib_cpu_ci->info_list)
> >                               continue;/* skip if itself or no cacheinfo */
> >
> > -                     sib_leaf = per_cpu_cacheinfo_idx(sibling, index);
> > -                     cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map);
> > -                     cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map);
> > +                     for (sib_index = 0; sib_index < cache_leaves(sibling); sib_index++) {
> > +                             sib_leaf = per_cpu_cacheinfo_idx(sibling, sib_index);
> > +                             if (cache_leaves_are_shared(this_leaf, sib_leaf)) {
> > +                                     cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map);
> > +                                     cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map);
>
> Same comment as above.
>
> --
> Regards,
> Sudeep

Regards,
Yong-Xuan
  

Patch

diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index 950b22cdb5f7..dfa804bcf3cc 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -256,7 +256,7 @@  static int cache_shared_cpu_map_setup(unsigned int cpu)
 {
 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 	struct cacheinfo *this_leaf, *sib_leaf;
-	unsigned int index;
+	unsigned int index, sib_index;
 	int ret = 0;
 
 	if (this_cpu_ci->cpu_map_populated)
@@ -284,11 +284,12 @@  static int cache_shared_cpu_map_setup(unsigned int cpu)
 
 			if (i == cpu || !sib_cpu_ci->info_list)
 				continue;/* skip if itself or no cacheinfo */
-
-			sib_leaf = per_cpu_cacheinfo_idx(i, index);
-			if (cache_leaves_are_shared(this_leaf, sib_leaf)) {
-				cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map);
-				cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
+			for (sib_index = 0; sib_index < cache_leaves(i); sib_index++) {
+				sib_leaf = per_cpu_cacheinfo_idx(i, sib_index);
+				if (cache_leaves_are_shared(this_leaf, sib_leaf)) {
+					cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map);
+					cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
+				}
 			}
 		}
 		/* record the maximum cache line size */
@@ -302,7 +303,7 @@  static int cache_shared_cpu_map_setup(unsigned int cpu)
 static void cache_shared_cpu_map_remove(unsigned int cpu)
 {
 	struct cacheinfo *this_leaf, *sib_leaf;
-	unsigned int sibling, index;
+	unsigned int sibling, index, sib_index;
 
 	for (index = 0; index < cache_leaves(cpu); index++) {
 		this_leaf = per_cpu_cacheinfo_idx(cpu, index);
@@ -313,9 +314,13 @@  static void cache_shared_cpu_map_remove(unsigned int cpu)
 			if (sibling == cpu || !sib_cpu_ci->info_list)
 				continue;/* skip if itself or no cacheinfo */
 
-			sib_leaf = per_cpu_cacheinfo_idx(sibling, index);
-			cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map);
-			cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map);
+			for (sib_index = 0; sib_index < cache_leaves(sibling); sib_index++) {
+				sib_leaf = per_cpu_cacheinfo_idx(sibling, sib_index);
+				if (cache_leaves_are_shared(this_leaf, sib_leaf)) {
+					cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map);
+					cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map);
+				}
+			}
 		}
 	}
 }