[3/3] hwmon: (coretemp) Fix core count limitation

Message ID 20231127131651.476795-4-rui.zhang@intel.com
State New
Headers
Series hwmon: (coretemp) Fix core count limitation |

Commit Message

Zhang, Rui Nov. 27, 2023, 1:16 p.m. UTC
  Currently, coretemp driver only supports 128 cores per package.
This loses some core temperation information on systems that have more
than 128 cores per package.
 [   58.685033] coretemp coretemp.0: Adding Core 128 failed
 [   58.692009] coretemp coretemp.0: Adding Core 129 failed

Fix the problem by using a per package list to maintain the per core
temp_data instead of the fixed length pdata->core_data[] array.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/hwmon/coretemp.c | 110 ++++++++++++++++++---------------------
 1 file changed, 52 insertions(+), 58 deletions(-)
  

Comments

Ashok Raj Dec. 1, 2023, 2:08 a.m. UTC | #1
On Mon, Nov 27, 2023 at 09:16:51PM +0800, Zhang Rui wrote:
> Currently, coretemp driver only supports 128 cores per package.
> This loses some core temperation information on systems that have more

s/temperation/temperature

> than 128 cores per package.
>  [   58.685033] coretemp coretemp.0: Adding Core 128 failed
>  [   58.692009] coretemp coretemp.0: Adding Core 129 failed
> 
> Fix the problem by using a per package list to maintain the per core
> temp_data instead of the fixed length pdata->core_data[] array.
> 
> Signed-off-by: Zhang Rui <rui.zhang@intel.com>
> ---
>  drivers/hwmon/coretemp.c | 110 ++++++++++++++++++---------------------
>  1 file changed, 52 insertions(+), 58 deletions(-)
> 
> diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
> index cef43fedbd58..1bb1a6e4b07b 100644
> --- a/drivers/hwmon/coretemp.c
> +++ b/drivers/hwmon/coretemp.c
> @@ -39,11 +39,7 @@ static int force_tjmax;
>  module_param_named(tjmax, force_tjmax, int, 0444);
>  MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
>  
> -#define PKG_SYSFS_ATTR_NO	1	/* Sysfs attribute for package temp */
> -#define BASE_SYSFS_ATTR_NO	2	/* Sysfs Base attr no for coretemp */
> -#define NUM_REAL_CORES		128	/* Number of Real cores per cpu */
>  #define CORETEMP_NAME_LENGTH	28	/* String Length of attrs */
> -#define MAX_CORE_DATA		(NUM_REAL_CORES + BASE_SYSFS_ATTR_NO)
>  
>  enum coretemp_attr_index {
>  	ATTR_LABEL,
> @@ -90,17 +86,17 @@ struct temp_data {
>  	struct attribute *attrs[TOTAL_ATTRS + 1];
>  	struct attribute_group attr_group;
>  	struct mutex update_lock;
> +	struct list_head node;
>  };
>  
>  /* Platform Data per Physical CPU */
>  struct platform_data {
>  	struct device		*hwmon_dev;
>  	u16			pkg_id;
> -	u16			cpu_map[NUM_REAL_CORES];
> -	struct ida		ida;
>  	struct cpumask		cpumask;
> -	struct temp_data	*core_data[MAX_CORE_DATA];
>  	struct device_attribute name_attr;
> +	struct mutex		core_data_lock;
> +	struct list_head	core_data_list;
>  };
>  
>  struct tjmax_pci {
> @@ -491,6 +487,23 @@ static struct temp_data *init_temp_data(unsigned int cpu, int pkg_flag)
>  	return tdata;
>  }
>  
> +static struct temp_data *get_tdata(struct platform_data *pdata, int cpu)
> +{
> +	struct temp_data *tdata;
> +
> +	mutex_lock(&pdata->core_data_lock);
> +	list_for_each_entry(tdata, &pdata->core_data_list, node) {
> +		if (cpu >= 0 && !tdata->is_pkg_data && tdata->cpu_core_id == topology_core_id(cpu))
> +			goto found;
> +		if (cpu < 0 && tdata->is_pkg_data)
> +			goto found;
> +	}
> +	tdata = NULL;

What used to be an array, is now a list? Is it possible to get the number
of cores_per_package during initialization and allocate the per-core? You
can still get them indexing from core_id and you can possibly lose the
mutex and search?

I don't know this code well enough... Just a thought.

> +found:
> +	mutex_unlock(&pdata->core_data_lock);
> +	return tdata;
> +}
> +
>  static int create_core_data(struct platform_device *pdev, unsigned int cpu,
>  			    int pkg_flag)
>  {
> @@ -498,37 +511,29 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
>  	struct platform_data *pdata = platform_get_drvdata(pdev);
>  	struct cpuinfo_x86 *c = &cpu_data(cpu);
>  	u32 eax, edx;
> -	int err, index, attr_no;
> +	int err, attr_no;
>  
>  	if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
>  		return 0;
>  
> +	tdata = get_tdata(pdata, pkg_flag ? -1 : cpu);
> +	if (tdata)
> +		return -EEXIST;
> +
> +	tdata = init_temp_data(cpu, pkg_flag);

Is temp_data per_cpu or per_core? Wasn't sure if temp_data needs a CPU
number there along with cpu_core_id


> +	if (!tdata)
> +		return -ENOMEM;
> +
>  	/*
>  	 * Find attr number for sysfs:
>  	 * We map the attr number to core id of the CPU
>  	 * The attr number is always core id + 2
>  	 * The Pkgtemp will always show up as temp1_*, if available
>  	 */
> -	if (pkg_flag) {
> -		attr_no = PKG_SYSFS_ATTR_NO;
> -	} else {
> -		index = ida_alloc(&pdata->ida, GFP_KERNEL);
> -		if (index < 0)
> -			return index;
> -		pdata->cpu_map[index] = topology_core_id(cpu);
> -		attr_no = index + BASE_SYSFS_ATTR_NO;
> -	}
> -
> -	if (attr_no > MAX_CORE_DATA - 1) {
> -		err = -ERANGE;
> -		goto ida_free;
> -	}
> -
> -	tdata = init_temp_data(cpu, pkg_flag);
> -	if (!tdata) {
> -		err = -ENOMEM;
> -		goto ida_free;
> -	}
> +	if (pkg_flag)
> +		attr_no = 1;
> +	else
> +		attr_no = tdata->cpu_core_id + 2;
>  
>  	/* Test if we can access the status register */
>  	err = rdmsr_safe_on_cpu(cpu, tdata->status_reg, &eax, &edx);
> @@ -547,20 +552,18 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu,
>  		if (get_ttarget(tdata, &pdev->dev) >= 0)
>  			tdata->attr_size++;
>  
> -	pdata->core_data[attr_no] = tdata;
> -
>  	/* Create sysfs interfaces */
>  	err = create_core_attrs(tdata, pdata->hwmon_dev, attr_no);
>  	if (err)
>  		goto exit_free;
>  
> +	mutex_lock(&pdata->core_data_lock);
> +	list_add(&tdata->node, &pdata->core_data_list);
> +	mutex_unlock(&pdata->core_data_lock);
> +
>  	return 0;
>  exit_free:
> -	pdata->core_data[attr_no] = NULL;
>  	kfree(tdata);
> -ida_free:
> -	if (!pkg_flag)
> -		ida_free(&pdata->ida, index);
>  	return err;
>  }
>  
> @@ -571,9 +574,9 @@ coretemp_add_core(struct platform_device *pdev, unsigned int cpu, int pkg_flag)
>  		dev_err(&pdev->dev, "Adding Core %u failed\n", cpu);
>  }
>  
> -static void coretemp_remove_core(struct platform_data *pdata, int indx)
> +static void coretemp_remove_core(struct platform_data *pdata, int cpu)
>  {
> -	struct temp_data *tdata = pdata->core_data[indx];
> +	struct temp_data *tdata = get_tdata(pdata, cpu);
>  
>  	/* if we errored on add then this is already gone */
>  	if (!tdata)
> @@ -582,11 +585,11 @@ static void coretemp_remove_core(struct platform_data *pdata, int indx)
>  	/* Remove the sysfs attributes */
>  	sysfs_remove_group(&pdata->hwmon_dev->kobj, &tdata->attr_group);
>  
> -	kfree(pdata->core_data[indx]);
> -	pdata->core_data[indx] = NULL;
> +	mutex_lock(&pdata->core_data_lock);
> +	list_del(&tdata->node);
> +	mutex_unlock(&pdata->core_data_lock);
>  
> -	if (indx >= BASE_SYSFS_ATTR_NO)
> -		ida_free(&pdata->ida, indx - BASE_SYSFS_ATTR_NO);
> +	kfree(tdata);
>  }
>  
>  static int coretemp_device_add(int zoneid)
> @@ -601,7 +604,8 @@ static int coretemp_device_add(int zoneid)
>  		return -ENOMEM;
>  
>  	pdata->pkg_id = zoneid;
> -	ida_init(&pdata->ida);
> +	mutex_init(&pdata->core_data_lock);
> +	INIT_LIST_HEAD(&pdata->core_data_list);
>  
>  	pdev = platform_device_alloc(DRVNAME, zoneid);
>  	if (!pdev) {
> @@ -629,7 +633,6 @@ static void coretemp_device_remove(int zoneid)
>  	struct platform_device *pdev = zone_devices[zoneid];
>  	struct platform_data *pdata = platform_get_drvdata(pdev);
>  
> -	ida_destroy(&pdata->ida);
>  	kfree(pdata);
>  	platform_device_unregister(pdev);
>  }
> @@ -699,7 +702,7 @@ static int coretemp_cpu_offline(unsigned int cpu)
>  	struct platform_device *pdev = coretemp_get_pdev(cpu);
>  	struct platform_data *pd;
>  	struct temp_data *tdata;
> -	int i, indx = -1, target;
> +	int target;
>  
>  	/* No need to tear down any interfaces for suspend */
>  	if (cpuhp_tasks_frozen)
> @@ -710,19 +713,10 @@ static int coretemp_cpu_offline(unsigned int cpu)
>  	if (!pd->hwmon_dev)
>  		return 0;
>  
> -	for (i = 0; i < NUM_REAL_CORES; i++) {
> -		if (pd->cpu_map[i] == topology_core_id(cpu)) {
> -			indx = i + BASE_SYSFS_ATTR_NO;
> -			break;
> -		}
> -	}
> -
> -	/* Too many cores and this core is not populated, just return */
> -	if (indx < 0)
> +	tdata = get_tdata(pd, cpu);
> +	if (!tdata)
>  		return 0;
>  
> -	tdata = pd->core_data[indx];
> -
>  	cpumask_clear_cpu(cpu, &pd->cpumask);
>  
>  	/*
> @@ -732,20 +726,20 @@ static int coretemp_cpu_offline(unsigned int cpu)
>  	 */
>  	target = cpumask_any_and(&pd->cpumask, topology_sibling_cpumask(cpu));
>  	if (target >= nr_cpu_ids) {
> -		coretemp_remove_core(pd, indx);
> -	} else if (tdata && tdata->cpu == cpu) {
> +		coretemp_remove_core(pd, cpu);
> +	} else if (tdata->cpu == cpu) {
>  		mutex_lock(&tdata->update_lock);
>  		tdata->cpu = target;
>  		mutex_unlock(&tdata->update_lock);
>  	}
>  
> +	tdata = get_tdata(pd, -1);
>  	/*
>  	 * If all cores in this pkg are offline, remove the interface.
>  	 */
> -	tdata = pd->core_data[PKG_SYSFS_ATTR_NO];
>  	if (cpumask_empty(&pd->cpumask)) {
>  		if (tdata)
> -			coretemp_remove_core(pd, PKG_SYSFS_ATTR_NO);
> +			coretemp_remove_core(pd, -1);
>  		hwmon_device_unregister(pd->hwmon_dev);
>  		pd->hwmon_dev = NULL;
>  		return 0;
> -- 
> 2.34.1
>
  
Guenter Roeck Dec. 1, 2023, 3:06 a.m. UTC | #2
On 11/27/23 05:16, Zhang Rui wrote:
> Currently, coretemp driver only supports 128 cores per package.
> This loses some core temperation information on systems that have more
> than 128 cores per package.
>   [   58.685033] coretemp coretemp.0: Adding Core 128 failed
>   [   58.692009] coretemp coretemp.0: Adding Core 129 failed
> 
> Fix the problem by using a per package list to maintain the per core
> temp_data instead of the fixed length pdata->core_data[] array.
> 
> Signed-off-by: Zhang Rui <rui.zhang@intel.com>
> ---
>   drivers/hwmon/coretemp.c | 110 ++++++++++++++++++---------------------
>   1 file changed, 52 insertions(+), 58 deletions(-)
> 
> diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
> index cef43fedbd58..1bb1a6e4b07b 100644
> --- a/drivers/hwmon/coretemp.c
> +++ b/drivers/hwmon/coretemp.c
> @@ -39,11 +39,7 @@ static int force_tjmax;
>   module_param_named(tjmax, force_tjmax, int, 0444);
>   MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
>   
> -#define PKG_SYSFS_ATTR_NO	1	/* Sysfs attribute for package temp */
> -#define BASE_SYSFS_ATTR_NO	2	/* Sysfs Base attr no for coretemp */
> -#define NUM_REAL_CORES		128	/* Number of Real cores per cpu */
>   #define CORETEMP_NAME_LENGTH	28	/* String Length of attrs */
> -#define MAX_CORE_DATA		(NUM_REAL_CORES + BASE_SYSFS_ATTR_NO)
>   
>   enum coretemp_attr_index {
>   	ATTR_LABEL,
> @@ -90,17 +86,17 @@ struct temp_data {
>   	struct attribute *attrs[TOTAL_ATTRS + 1];
>   	struct attribute_group attr_group;
>   	struct mutex update_lock;
> +	struct list_head node;
>   };
>   
>   /* Platform Data per Physical CPU */
>   struct platform_data {
>   	struct device		*hwmon_dev;
>   	u16			pkg_id;
> -	u16			cpu_map[NUM_REAL_CORES];
> -	struct ida		ida;
>   	struct cpumask		cpumask;
> -	struct temp_data	*core_data[MAX_CORE_DATA];
>   	struct device_attribute name_attr;
> +	struct mutex		core_data_lock;
> +	struct list_head	core_data_list;
>   };
>   
>   struct tjmax_pci {
> @@ -491,6 +487,23 @@ static struct temp_data *init_temp_data(unsigned int cpu, int pkg_flag)
>   	return tdata;
>   }
>   
> +static struct temp_data *get_tdata(struct platform_data *pdata, int cpu)
> +{
> +	struct temp_data *tdata;
> +
> +	mutex_lock(&pdata->core_data_lock);
> +	list_for_each_entry(tdata, &pdata->core_data_list, node) {
> +		if (cpu >= 0 && !tdata->is_pkg_data && tdata->cpu_core_id == topology_core_id(cpu))
> +			goto found;
> +		if (cpu < 0 && tdata->is_pkg_data)
> +			goto found;
> +	}

I really don't like this. With 128+ cores, it gets terribly expensive.

How about calculating the number of cores in the probe function and
allocating cpu_map[] and core_data[] instead ?

Thanks,
Guenter
  
Zhang, Rui Dec. 1, 2023, 5:41 p.m. UTC | #3
On Thu, 2023-11-30 at 19:06 -0800, Guenter Roeck wrote:
> On 11/27/23 05:16, Zhang Rui wrote:
> > Currently, coretemp driver only supports 128 cores per package.
> > This loses some core temperation information on systems that have
> > more
> > than 128 cores per package.
> >   [   58.685033] coretemp coretemp.0: Adding Core 128 failed
> >   [   58.692009] coretemp coretemp.0: Adding Core 129 failed
> > 
> > Fix the problem by using a per package list to maintain the per
> > core
> > temp_data instead of the fixed length pdata->core_data[] array.
> > 
> > Signed-off-by: Zhang Rui <rui.zhang@intel.com>
> > ---
> >   drivers/hwmon/coretemp.c | 110 ++++++++++++++++++----------------
> > -----
> >   1 file changed, 52 insertions(+), 58 deletions(-)
> > 
> > diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
> > index cef43fedbd58..1bb1a6e4b07b 100644
> > --- a/drivers/hwmon/coretemp.c
> > +++ b/drivers/hwmon/coretemp.c
> > @@ -39,11 +39,7 @@ static int force_tjmax;
> >   module_param_named(tjmax, force_tjmax, int, 0444);
> >   MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
> >   
> > -#define PKG_SYSFS_ATTR_NO      1       /* Sysfs attribute for
> > package temp */
> > -#define BASE_SYSFS_ATTR_NO     2       /* Sysfs Base attr no for
> > coretemp */
> > -#define NUM_REAL_CORES         128     /* Number of Real cores per
> > cpu */
> >   #define CORETEMP_NAME_LENGTH  28      /* String Length of attrs
> > */
> > -#define MAX_CORE_DATA          (NUM_REAL_CORES +
> > BASE_SYSFS_ATTR_NO)
> >   
> >   enum coretemp_attr_index {
> >         ATTR_LABEL,
> > @@ -90,17 +86,17 @@ struct temp_data {
> >         struct attribute *attrs[TOTAL_ATTRS + 1];
> >         struct attribute_group attr_group;
> >         struct mutex update_lock;
> > +       struct list_head node;
> >   };
> >   
> >   /* Platform Data per Physical CPU */
> >   struct platform_data {
> >         struct device           *hwmon_dev;
> >         u16                     pkg_id;
> > -       u16                     cpu_map[NUM_REAL_CORES];
> > -       struct ida              ida;
> >         struct cpumask          cpumask;
> > -       struct temp_data        *core_data[MAX_CORE_DATA];
> >         struct device_attribute name_attr;
> > +       struct mutex            core_data_lock;
> > +       struct list_head        core_data_list;
> >   };
> >   
> >   struct tjmax_pci {
> > @@ -491,6 +487,23 @@ static struct temp_data
> > *init_temp_data(unsigned int cpu, int pkg_flag)
> >         return tdata;
> >   }
> >   
> > +static struct temp_data *get_tdata(struct platform_data *pdata,
> > int cpu)
> > +{
> > +       struct temp_data *tdata;
> > +
> > +       mutex_lock(&pdata->core_data_lock);
> > +       list_for_each_entry(tdata, &pdata->core_data_list, node) {
> > +               if (cpu >= 0 && !tdata->is_pkg_data && tdata-
> > >cpu_core_id == topology_core_id(cpu))
> > +                       goto found;
> > +               if (cpu < 0 && tdata->is_pkg_data)
> > +                       goto found;
> > +       }
> 
> I really don't like this. With 128+ cores, it gets terribly
> expensive.

I think this is only invoked in the cpu online/offline code, which is
not the hot path.

> 
> How about calculating the number of cores in the probe function and
> allocating cpu_map[] and core_data[] instead ?

Problem is that the number of cores is not available due to some
limitations in current CPU topology code.

Thomas has some patch series to fix this but that has not been merged
yet and we don't know when.

A simpler workaround for this issue is to change NUM_REAL_CORES to a
bigger value, and do dynamic memory allocation first. And we can change
the code to use the real number of cores later if that information
becomes available.

thanks,
rui
  
Zhang, Rui Dec. 1, 2023, 5:47 p.m. UTC | #4
On Thu, 2023-11-30 at 18:08 -0800, Ashok Raj wrote:
> On Mon, Nov 27, 2023 at 09:16:51PM +0800, Zhang Rui wrote:
> > Currently, coretemp driver only supports 128 cores per package.
> > This loses some core temperation information on systems that have
> > more
> 
> s/temperation/temperature
> 
> > than 128 cores per package.
> >  [   58.685033] coretemp coretemp.0: Adding Core 128 failed
> >  [   58.692009] coretemp coretemp.0: Adding Core 129 failed
> > 
> > Fix the problem by using a per package list to maintain the per
> > core
> > temp_data instead of the fixed length pdata->core_data[] array.
> > 
> > Signed-off-by: Zhang Rui <rui.zhang@intel.com>
> > ---
> >  drivers/hwmon/coretemp.c | 110 ++++++++++++++++++-----------------
> > ----
> >  1 file changed, 52 insertions(+), 58 deletions(-)
> > 
> > diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
> > index cef43fedbd58..1bb1a6e4b07b 100644
> > --- a/drivers/hwmon/coretemp.c
> > +++ b/drivers/hwmon/coretemp.c
> > @@ -39,11 +39,7 @@ static int force_tjmax;
> >  module_param_named(tjmax, force_tjmax, int, 0444);
> >  MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
> >  
> > -#define PKG_SYSFS_ATTR_NO      1       /* Sysfs attribute for
> > package temp */
> > -#define BASE_SYSFS_ATTR_NO     2       /* Sysfs Base attr no for
> > coretemp */
> > -#define NUM_REAL_CORES         128     /* Number of Real cores per
> > cpu */
> >  #define CORETEMP_NAME_LENGTH   28      /* String Length of attrs
> > */
> > -#define MAX_CORE_DATA          (NUM_REAL_CORES +
> > BASE_SYSFS_ATTR_NO)
> >  
> >  enum coretemp_attr_index {
> >         ATTR_LABEL,
> > @@ -90,17 +86,17 @@ struct temp_data {
> >         struct attribute *attrs[TOTAL_ATTRS + 1];
> >         struct attribute_group attr_group;
> >         struct mutex update_lock;
> > +       struct list_head node;
> >  };
> >  
> >  /* Platform Data per Physical CPU */
> >  struct platform_data {
> >         struct device           *hwmon_dev;
> >         u16                     pkg_id;
> > -       u16                     cpu_map[NUM_REAL_CORES];
> > -       struct ida              ida;
> >         struct cpumask          cpumask;
> > -       struct temp_data        *core_data[MAX_CORE_DATA];
> >         struct device_attribute name_attr;
> > +       struct mutex            core_data_lock;
> > +       struct list_head        core_data_list;
> >  };
> >  
> >  struct tjmax_pci {
> > @@ -491,6 +487,23 @@ static struct temp_data
> > *init_temp_data(unsigned int cpu, int pkg_flag)
> >         return tdata;
> >  }
> >  
> > +static struct temp_data *get_tdata(struct platform_data *pdata,
> > int cpu)
> > +{
> > +       struct temp_data *tdata;
> > +
> > +       mutex_lock(&pdata->core_data_lock);
> > +       list_for_each_entry(tdata, &pdata->core_data_list, node) {
> > +               if (cpu >= 0 && !tdata->is_pkg_data && tdata-
> > >cpu_core_id == topology_core_id(cpu))
> > +                       goto found;
> > +               if (cpu < 0 && tdata->is_pkg_data)
> > +                       goto found;
> > +       }
> > +       tdata = NULL;
> 
> What used to be an array, is now a list? Is it possible to get the
> number
> of cores_per_package during initialization and allocate the per-core?
> You
> can still get them indexing from core_id and you can possibly lose
> the
> mutex and search?
> 
> I don't know this code well enough... Just a thought.

yeah, sadly cores_per_package is not available for now as I mentioned
in the other email.

> 
> > +found:
> > +       mutex_unlock(&pdata->core_data_lock);
> > +       return tdata;
> > +}
> > +
> >  static int create_core_data(struct platform_device *pdev, unsigned
> > int cpu,
> >                             int pkg_flag)
> >  {
> > @@ -498,37 +511,29 @@ static int create_core_data(struct
> > platform_device *pdev, unsigned int cpu,
> >         struct platform_data *pdata = platform_get_drvdata(pdev);
> >         struct cpuinfo_x86 *c = &cpu_data(cpu);
> >         u32 eax, edx;
> > -       int err, index, attr_no;
> > +       int err, attr_no;
> >  
> >         if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
> >                 return 0;
> >  
> > +       tdata = get_tdata(pdata, pkg_flag ? -1 : cpu);
> > +       if (tdata)
> > +               return -EEXIST;
> > +
> > +       tdata = init_temp_data(cpu, pkg_flag);
> 
> Is temp_data per_cpu or per_core?

it is per_core.

>  Wasn't sure if temp_data needs a CPU
> number there along with cpu_core_id

CPU number is needed to access the core temperature MSRs.

thanks,
rui

> 
> 
> > +       if (!tdata)
> > +               return -ENOMEM;
> > +
> >         /*
> >          * Find attr number for sysfs:
> >          * We map the attr number to core id of the CPU
> >          * The attr number is always core id + 2
> >          * The Pkgtemp will always show up as temp1_*, if available
> >          */
> > -       if (pkg_flag) {
> > -               attr_no = PKG_SYSFS_ATTR_NO;
> > -       } else {
> > -               index = ida_alloc(&pdata->ida, GFP_KERNEL);
> > -               if (index < 0)
> > -                       return index;
> > -               pdata->cpu_map[index] = topology_core_id(cpu);
> > -               attr_no = index + BASE_SYSFS_ATTR_NO;
> > -       }
> > -
> > -       if (attr_no > MAX_CORE_DATA - 1) {
> > -               err = -ERANGE;
> > -               goto ida_free;
> > -       }
> > -
> > -       tdata = init_temp_data(cpu, pkg_flag);
> > -       if (!tdata) {
> > -               err = -ENOMEM;
> > -               goto ida_free;
> > -       }
> > +       if (pkg_flag)
> > +               attr_no = 1;
> > +       else
> > +               attr_no = tdata->cpu_core_id + 2;
> >  
> >         /* Test if we can access the status register */
> >         err = rdmsr_safe_on_cpu(cpu, tdata->status_reg, &eax,
> > &edx);
> > @@ -547,20 +552,18 @@ static int create_core_data(struct
> > platform_device *pdev, unsigned int cpu,
> >                 if (get_ttarget(tdata, &pdev->dev) >= 0)
> >                         tdata->attr_size++;
> >  
> > -       pdata->core_data[attr_no] = tdata;
> > -
> >         /* Create sysfs interfaces */
> >         err = create_core_attrs(tdata, pdata->hwmon_dev, attr_no);
> >         if (err)
> >                 goto exit_free;
> >  
> > +       mutex_lock(&pdata->core_data_lock);
> > +       list_add(&tdata->node, &pdata->core_data_list);
> > +       mutex_unlock(&pdata->core_data_lock);
> > +
> >         return 0;
> >  exit_free:
> > -       pdata->core_data[attr_no] = NULL;
> >         kfree(tdata);
> > -ida_free:
> > -       if (!pkg_flag)
> > -               ida_free(&pdata->ida, index);
> >         return err;
> >  }
> >  
> > @@ -571,9 +574,9 @@ coretemp_add_core(struct platform_device *pdev,
> > unsigned int cpu, int pkg_flag)
> >                 dev_err(&pdev->dev, "Adding Core %u failed\n",
> > cpu);
> >  }
> >  
> > -static void coretemp_remove_core(struct platform_data *pdata, int
> > indx)
> > +static void coretemp_remove_core(struct platform_data *pdata, int
> > cpu)
> >  {
> > -       struct temp_data *tdata = pdata->core_data[indx];
> > +       struct temp_data *tdata = get_tdata(pdata, cpu);
> >  
> >         /* if we errored on add then this is already gone */
> >         if (!tdata)
> > @@ -582,11 +585,11 @@ static void coretemp_remove_core(struct
> > platform_data *pdata, int indx)
> >         /* Remove the sysfs attributes */
> >         sysfs_remove_group(&pdata->hwmon_dev->kobj, &tdata-
> > >attr_group);
> >  
> > -       kfree(pdata->core_data[indx]);
> > -       pdata->core_data[indx] = NULL;
> > +       mutex_lock(&pdata->core_data_lock);
> > +       list_del(&tdata->node);
> > +       mutex_unlock(&pdata->core_data_lock);
> >  
> > -       if (indx >= BASE_SYSFS_ATTR_NO)
> > -               ida_free(&pdata->ida, indx - BASE_SYSFS_ATTR_NO);
> > +       kfree(tdata);
> >  }
> >  
> >  static int coretemp_device_add(int zoneid)
> > @@ -601,7 +604,8 @@ static int coretemp_device_add(int zoneid)
> >                 return -ENOMEM;
> >  
> >         pdata->pkg_id = zoneid;
> > -       ida_init(&pdata->ida);
> > +       mutex_init(&pdata->core_data_lock);
> > +       INIT_LIST_HEAD(&pdata->core_data_list);
> >  
> >         pdev = platform_device_alloc(DRVNAME, zoneid);
> >         if (!pdev) {
> > @@ -629,7 +633,6 @@ static void coretemp_device_remove(int zoneid)
> >         struct platform_device *pdev = zone_devices[zoneid];
> >         struct platform_data *pdata = platform_get_drvdata(pdev);
> >  
> > -       ida_destroy(&pdata->ida);
> >         kfree(pdata);
> >         platform_device_unregister(pdev);
> >  }
> > @@ -699,7 +702,7 @@ static int coretemp_cpu_offline(unsigned int
> > cpu)
> >         struct platform_device *pdev = coretemp_get_pdev(cpu);
> >         struct platform_data *pd;
> >         struct temp_data *tdata;
> > -       int i, indx = -1, target;
> > +       int target;
> >  
> >         /* No need to tear down any interfaces for suspend */
> >         if (cpuhp_tasks_frozen)
> > @@ -710,19 +713,10 @@ static int coretemp_cpu_offline(unsigned int
> > cpu)
> >         if (!pd->hwmon_dev)
> >                 return 0;
> >  
> > -       for (i = 0; i < NUM_REAL_CORES; i++) {
> > -               if (pd->cpu_map[i] == topology_core_id(cpu)) {
> > -                       indx = i + BASE_SYSFS_ATTR_NO;
> > -                       break;
> > -               }
> > -       }
> > -
> > -       /* Too many cores and this core is not populated, just
> > return */
> > -       if (indx < 0)
> > +       tdata = get_tdata(pd, cpu);
> > +       if (!tdata)
> >                 return 0;
> >  
> > -       tdata = pd->core_data[indx];
> > -
> >         cpumask_clear_cpu(cpu, &pd->cpumask);
> >  
> >         /*
> > @@ -732,20 +726,20 @@ static int coretemp_cpu_offline(unsigned int
> > cpu)
> >          */
> >         target = cpumask_any_and(&pd->cpumask,
> > topology_sibling_cpumask(cpu));
> >         if (target >= nr_cpu_ids) {
> > -               coretemp_remove_core(pd, indx);
> > -       } else if (tdata && tdata->cpu == cpu) {
> > +               coretemp_remove_core(pd, cpu);
> > +       } else if (tdata->cpu == cpu) {
> >                 mutex_lock(&tdata->update_lock);
> >                 tdata->cpu = target;
> >                 mutex_unlock(&tdata->update_lock);
> >         }
> >  
> > +       tdata = get_tdata(pd, -1);
> >         /*
> >          * If all cores in this pkg are offline, remove the
> > interface.
> >          */
> > -       tdata = pd->core_data[PKG_SYSFS_ATTR_NO];
> >         if (cpumask_empty(&pd->cpumask)) {
> >                 if (tdata)
> > -                       coretemp_remove_core(pd,
> > PKG_SYSFS_ATTR_NO);
> > +                       coretemp_remove_core(pd, -1);
> >                 hwmon_device_unregister(pd->hwmon_dev);
> >                 pd->hwmon_dev = NULL;
> >                 return 0;
> > -- 
> > 2.34.1
> >
  
Ashok Raj Dec. 1, 2023, 10:58 p.m. UTC | #5
On Fri, Dec 01, 2023 at 09:47:30AM -0800, Zhang, Rui wrote:
> On Thu, 2023-11-30 at 18:08 -0800, Ashok Raj wrote:
> > On Mon, Nov 27, 2023 at 09:16:51PM +0800, Zhang Rui wrote:
> > >  
> > > +static struct temp_data *get_tdata(struct platform_data *pdata,
> > > int cpu)
> > > +{
> > > +       struct temp_data *tdata;
> > > +
> > > +       mutex_lock(&pdata->core_data_lock);
> > > +       list_for_each_entry(tdata, &pdata->core_data_list, node) {
> > > +               if (cpu >= 0 && !tdata->is_pkg_data && tdata-
> > > >cpu_core_id == topology_core_id(cpu))
> > > +                       goto found;
> > > +               if (cpu < 0 && tdata->is_pkg_data)
> > > +                       goto found;
> > > +       }
> > > +       tdata = NULL;
> > 
> > What used to be an array, is now a list? Is it possible to get the
> > number
> > of cores_per_package during initialization and allocate the per-core?
> > You
> > can still get them indexing from core_id and you can possibly lose
> > the
> > mutex and search?
> > 
> > I don't know this code well enough... Just a thought.
> 
> yeah, sadly cores_per_package is not available for now as I mentioned
> in the other email.

Couldn't we reuse the logic from Thomas's topology work that gives this
upfront based on 0x1f?

> 
> > 
> > > +found:
> > > +       mutex_unlock(&pdata->core_data_lock);
> > > +       return tdata;
> > > +}
> > > +
> > >  static int create_core_data(struct platform_device *pdev, unsigned
> > > int cpu,
> > >                             int pkg_flag)
> > >  {
> > > @@ -498,37 +511,29 @@ static int create_core_data(struct
> > > platform_device *pdev, unsigned int cpu,
> > >         struct platform_data *pdata = platform_get_drvdata(pdev);
> > >         struct cpuinfo_x86 *c = &cpu_data(cpu);
> > >         u32 eax, edx;
> > > -       int err, index, attr_no;
> > > +       int err, attr_no;
> > >  
> > >         if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
> > >                 return 0;
> > >  
> > > +       tdata = get_tdata(pdata, pkg_flag ? -1 : cpu);
> > > +       if (tdata)
> > > +               return -EEXIST;
> > > +
> > > +       tdata = init_temp_data(cpu, pkg_flag);
> > 
> > Is temp_data per_cpu or per_core?
> 
> it is per_core.
> 
> >  Wasn't sure if temp_data needs a CPU
> > number there along with cpu_core_id
> 
> CPU number is needed to access the core temperature MSRs.

What if that cpu is currently offline? maybe you can simply search
for_each_online_cpu() and match core_id from cpuinfo_x86?
  
Zhang, Rui Dec. 2, 2023, 7:22 a.m. UTC | #6
On Fri, 2023-12-01 at 14:58 -0800, Ashok Raj wrote:
> On Fri, Dec 01, 2023 at 09:47:30AM -0800, Zhang, Rui wrote:
> > On Thu, 2023-11-30 at 18:08 -0800, Ashok Raj wrote:
> > > On Mon, Nov 27, 2023 at 09:16:51PM +0800, Zhang Rui wrote:
> > > >  
> > > > +static struct temp_data *get_tdata(struct platform_data
> > > > *pdata,
> > > > int cpu)
> > > > +{
> > > > +       struct temp_data *tdata;
> > > > +
> > > > +       mutex_lock(&pdata->core_data_lock);
> > > > +       list_for_each_entry(tdata, &pdata->core_data_list,
> > > > node) {
> > > > +               if (cpu >= 0 && !tdata->is_pkg_data && tdata-
> > > > > cpu_core_id == topology_core_id(cpu))
> > > > +                       goto found;
> > > > +               if (cpu < 0 && tdata->is_pkg_data)
> > > > +                       goto found;
> > > > +       }
> > > > +       tdata = NULL;
> > > 
> > > What used to be an array, is now a list? Is it possible to get
> > > the
> > > number
> > > of cores_per_package during initialization and allocate the per-
> > > core?
> > > You
> > > can still get them indexing from core_id and you can possibly
> > > lose
> > > the
> > > mutex and search?
> > > 
> > > I don't know this code well enough... Just a thought.
> > 
> > yeah, sadly cores_per_package is not available for now as I
> > mentioned
> > in the other email.
> 
> Couldn't we reuse the logic from Thomas's topology work that gives
> this
> upfront based on 0x1f?

this sounds duplicate work to me. To me, there is no such an urgent
requirement that is worth this whole effort.

> 
> > 
> > > 
> > > > +found:
> > > > +       mutex_unlock(&pdata->core_data_lock);
> > > > +       return tdata;
> > > > +}
> > > > +
> > > >  static int create_core_data(struct platform_device *pdev,
> > > > unsigned
> > > > int cpu,
> > > >                             int pkg_flag)
> > > >  {
> > > > @@ -498,37 +511,29 @@ static int create_core_data(struct
> > > > platform_device *pdev, unsigned int cpu,
> > > >         struct platform_data *pdata =
> > > > platform_get_drvdata(pdev);
> > > >         struct cpuinfo_x86 *c = &cpu_data(cpu);
> > > >         u32 eax, edx;
> > > > -       int err, index, attr_no;
> > > > +       int err, attr_no;
> > > >  
> > > >         if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
> > > >                 return 0;
> > > >  
> > > > +       tdata = get_tdata(pdata, pkg_flag ? -1 : cpu);
> > > > +       if (tdata)
> > > > +               return -EEXIST;
> > > > +
> > > > +       tdata = init_temp_data(cpu, pkg_flag);
> > > 
> > > Is temp_data per_cpu or per_core?
> > 
> > it is per_core.
> > 
> > >  Wasn't sure if temp_data needs a CPU
> > > number there along with cpu_core_id
> > 
> > CPU number is needed to access the core temperature MSRs.
> 
> What if that cpu is currently offline? maybe you can simply search
> for_each_online_cpu() and match core_id from cpuinfo_x86?
> 
This is already handled in the cpu online/offline callbacks.
Each temp_data is always associated with an online CPU that can be used
to update the core temperature info.

thanks,
rui
  

Patch

diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index cef43fedbd58..1bb1a6e4b07b 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -39,11 +39,7 @@  static int force_tjmax;
 module_param_named(tjmax, force_tjmax, int, 0444);
 MODULE_PARM_DESC(tjmax, "TjMax value in degrees Celsius");
 
-#define PKG_SYSFS_ATTR_NO	1	/* Sysfs attribute for package temp */
-#define BASE_SYSFS_ATTR_NO	2	/* Sysfs Base attr no for coretemp */
-#define NUM_REAL_CORES		128	/* Number of Real cores per cpu */
 #define CORETEMP_NAME_LENGTH	28	/* String Length of attrs */
-#define MAX_CORE_DATA		(NUM_REAL_CORES + BASE_SYSFS_ATTR_NO)
 
 enum coretemp_attr_index {
 	ATTR_LABEL,
@@ -90,17 +86,17 @@  struct temp_data {
 	struct attribute *attrs[TOTAL_ATTRS + 1];
 	struct attribute_group attr_group;
 	struct mutex update_lock;
+	struct list_head node;
 };
 
 /* Platform Data per Physical CPU */
 struct platform_data {
 	struct device		*hwmon_dev;
 	u16			pkg_id;
-	u16			cpu_map[NUM_REAL_CORES];
-	struct ida		ida;
 	struct cpumask		cpumask;
-	struct temp_data	*core_data[MAX_CORE_DATA];
 	struct device_attribute name_attr;
+	struct mutex		core_data_lock;
+	struct list_head	core_data_list;
 };
 
 struct tjmax_pci {
@@ -491,6 +487,23 @@  static struct temp_data *init_temp_data(unsigned int cpu, int pkg_flag)
 	return tdata;
 }
 
+static struct temp_data *get_tdata(struct platform_data *pdata, int cpu)
+{
+	struct temp_data *tdata;
+
+	mutex_lock(&pdata->core_data_lock);
+	list_for_each_entry(tdata, &pdata->core_data_list, node) {
+		if (cpu >= 0 && !tdata->is_pkg_data && tdata->cpu_core_id == topology_core_id(cpu))
+			goto found;
+		if (cpu < 0 && tdata->is_pkg_data)
+			goto found;
+	}
+	tdata = NULL;
+found:
+	mutex_unlock(&pdata->core_data_lock);
+	return tdata;
+}
+
 static int create_core_data(struct platform_device *pdev, unsigned int cpu,
 			    int pkg_flag)
 {
@@ -498,37 +511,29 @@  static int create_core_data(struct platform_device *pdev, unsigned int cpu,
 	struct platform_data *pdata = platform_get_drvdata(pdev);
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	u32 eax, edx;
-	int err, index, attr_no;
+	int err, attr_no;
 
 	if (!housekeeping_cpu(cpu, HK_TYPE_MISC))
 		return 0;
 
+	tdata = get_tdata(pdata, pkg_flag ? -1 : cpu);
+	if (tdata)
+		return -EEXIST;
+
+	tdata = init_temp_data(cpu, pkg_flag);
+	if (!tdata)
+		return -ENOMEM;
+
 	/*
 	 * Find attr number for sysfs:
 	 * We map the attr number to core id of the CPU
 	 * The attr number is always core id + 2
 	 * The Pkgtemp will always show up as temp1_*, if available
 	 */
-	if (pkg_flag) {
-		attr_no = PKG_SYSFS_ATTR_NO;
-	} else {
-		index = ida_alloc(&pdata->ida, GFP_KERNEL);
-		if (index < 0)
-			return index;
-		pdata->cpu_map[index] = topology_core_id(cpu);
-		attr_no = index + BASE_SYSFS_ATTR_NO;
-	}
-
-	if (attr_no > MAX_CORE_DATA - 1) {
-		err = -ERANGE;
-		goto ida_free;
-	}
-
-	tdata = init_temp_data(cpu, pkg_flag);
-	if (!tdata) {
-		err = -ENOMEM;
-		goto ida_free;
-	}
+	if (pkg_flag)
+		attr_no = 1;
+	else
+		attr_no = tdata->cpu_core_id + 2;
 
 	/* Test if we can access the status register */
 	err = rdmsr_safe_on_cpu(cpu, tdata->status_reg, &eax, &edx);
@@ -547,20 +552,18 @@  static int create_core_data(struct platform_device *pdev, unsigned int cpu,
 		if (get_ttarget(tdata, &pdev->dev) >= 0)
 			tdata->attr_size++;
 
-	pdata->core_data[attr_no] = tdata;
-
 	/* Create sysfs interfaces */
 	err = create_core_attrs(tdata, pdata->hwmon_dev, attr_no);
 	if (err)
 		goto exit_free;
 
+	mutex_lock(&pdata->core_data_lock);
+	list_add(&tdata->node, &pdata->core_data_list);
+	mutex_unlock(&pdata->core_data_lock);
+
 	return 0;
 exit_free:
-	pdata->core_data[attr_no] = NULL;
 	kfree(tdata);
-ida_free:
-	if (!pkg_flag)
-		ida_free(&pdata->ida, index);
 	return err;
 }
 
@@ -571,9 +574,9 @@  coretemp_add_core(struct platform_device *pdev, unsigned int cpu, int pkg_flag)
 		dev_err(&pdev->dev, "Adding Core %u failed\n", cpu);
 }
 
-static void coretemp_remove_core(struct platform_data *pdata, int indx)
+static void coretemp_remove_core(struct platform_data *pdata, int cpu)
 {
-	struct temp_data *tdata = pdata->core_data[indx];
+	struct temp_data *tdata = get_tdata(pdata, cpu);
 
 	/* if we errored on add then this is already gone */
 	if (!tdata)
@@ -582,11 +585,11 @@  static void coretemp_remove_core(struct platform_data *pdata, int indx)
 	/* Remove the sysfs attributes */
 	sysfs_remove_group(&pdata->hwmon_dev->kobj, &tdata->attr_group);
 
-	kfree(pdata->core_data[indx]);
-	pdata->core_data[indx] = NULL;
+	mutex_lock(&pdata->core_data_lock);
+	list_del(&tdata->node);
+	mutex_unlock(&pdata->core_data_lock);
 
-	if (indx >= BASE_SYSFS_ATTR_NO)
-		ida_free(&pdata->ida, indx - BASE_SYSFS_ATTR_NO);
+	kfree(tdata);
 }
 
 static int coretemp_device_add(int zoneid)
@@ -601,7 +604,8 @@  static int coretemp_device_add(int zoneid)
 		return -ENOMEM;
 
 	pdata->pkg_id = zoneid;
-	ida_init(&pdata->ida);
+	mutex_init(&pdata->core_data_lock);
+	INIT_LIST_HEAD(&pdata->core_data_list);
 
 	pdev = platform_device_alloc(DRVNAME, zoneid);
 	if (!pdev) {
@@ -629,7 +633,6 @@  static void coretemp_device_remove(int zoneid)
 	struct platform_device *pdev = zone_devices[zoneid];
 	struct platform_data *pdata = platform_get_drvdata(pdev);
 
-	ida_destroy(&pdata->ida);
 	kfree(pdata);
 	platform_device_unregister(pdev);
 }
@@ -699,7 +702,7 @@  static int coretemp_cpu_offline(unsigned int cpu)
 	struct platform_device *pdev = coretemp_get_pdev(cpu);
 	struct platform_data *pd;
 	struct temp_data *tdata;
-	int i, indx = -1, target;
+	int target;
 
 	/* No need to tear down any interfaces for suspend */
 	if (cpuhp_tasks_frozen)
@@ -710,19 +713,10 @@  static int coretemp_cpu_offline(unsigned int cpu)
 	if (!pd->hwmon_dev)
 		return 0;
 
-	for (i = 0; i < NUM_REAL_CORES; i++) {
-		if (pd->cpu_map[i] == topology_core_id(cpu)) {
-			indx = i + BASE_SYSFS_ATTR_NO;
-			break;
-		}
-	}
-
-	/* Too many cores and this core is not populated, just return */
-	if (indx < 0)
+	tdata = get_tdata(pd, cpu);
+	if (!tdata)
 		return 0;
 
-	tdata = pd->core_data[indx];
-
 	cpumask_clear_cpu(cpu, &pd->cpumask);
 
 	/*
@@ -732,20 +726,20 @@  static int coretemp_cpu_offline(unsigned int cpu)
 	 */
 	target = cpumask_any_and(&pd->cpumask, topology_sibling_cpumask(cpu));
 	if (target >= nr_cpu_ids) {
-		coretemp_remove_core(pd, indx);
-	} else if (tdata && tdata->cpu == cpu) {
+		coretemp_remove_core(pd, cpu);
+	} else if (tdata->cpu == cpu) {
 		mutex_lock(&tdata->update_lock);
 		tdata->cpu = target;
 		mutex_unlock(&tdata->update_lock);
 	}
 
+	tdata = get_tdata(pd, -1);
 	/*
 	 * If all cores in this pkg are offline, remove the interface.
 	 */
-	tdata = pd->core_data[PKG_SYSFS_ATTR_NO];
 	if (cpumask_empty(&pd->cpumask)) {
 		if (tdata)
-			coretemp_remove_core(pd, PKG_SYSFS_ATTR_NO);
+			coretemp_remove_core(pd, -1);
 		hwmon_device_unregister(pd->hwmon_dev);
 		pd->hwmon_dev = NULL;
 		return 0;