[v3,4/6] cxl/region: Add trigger_poison_list sysfs attribute

Message ID a696d91e34fc845673345a6b024545df849a8fef.1668115235.git.alison.schofield@intel.com
State New
Headers
Series CXL Poison List Retrieval & Tracing |

Commit Message

Alison Schofield Nov. 11, 2022, 3:12 a.m. UTC
  From: Alison Schofield <alison.schofield@intel.com>

When a boolean 'true' is written to this attribute the region driver
retrieves the poison list for the capacity each device contributes
to this region. The list includes addresses that are poisoned, or
would result in poison if accessed, and the source of the poison.
The retrieved errors are logged as kernel trace events with the
label 'cxl_poison'.

Devices not supporting the poison list capability are ignored.

Signed-off-by: Alison Schofield <alison.schofield@intel.com>
---
 Documentation/ABI/testing/sysfs-bus-cxl | 14 +++++++++++
 drivers/cxl/core/region.c               | 33 +++++++++++++++++++++++++
 2 files changed, 47 insertions(+)
  

Comments

Jonathan Cameron Nov. 16, 2022, 12:50 p.m. UTC | #1
On Thu, 10 Nov 2022 19:12:42 -0800
alison.schofield@intel.com wrote:

> From: Alison Schofield <alison.schofield@intel.com>
> 
> When a boolean 'true' is written to this attribute the region driver
> retrieves the poison list for the capacity each device contributes
> to this region. The list includes addresses that are poisoned, or
> would result in poison if accessed, and the source of the poison.
> The retrieved errors are logged as kernel trace events with the
> label 'cxl_poison'.
> 
> Devices not supporting the poison list capability are ignored.
> 
> Signed-off-by: Alison Schofield <alison.schofield@intel.com>
Trivial comment inline you might want to consider.

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

> ---
>  Documentation/ABI/testing/sysfs-bus-cxl | 14 +++++++++++
>  drivers/cxl/core/region.c               | 33 +++++++++++++++++++++++++
>  2 files changed, 47 insertions(+)
> 
> diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl
> index 1c5f4a853ba2..54fad3bdcb2b 100644
> --- a/Documentation/ABI/testing/sysfs-bus-cxl
> +++ b/Documentation/ABI/testing/sysfs-bus-cxl
> @@ -402,3 +402,17 @@ Description:
>  		attribute is only visible for devices supporting the
>  		capability. The retrieved errors are logged as kernel
>  		trace events with the label 'cxl_poison'.
> +
> +
> +What:		/sys/bus/cxl/devices/regionZ/trigger_poison_list
> +Date:		November, 2022
> +KernelVersion:	v6.2
> +Contact:	linux-cxl@vger.kernel.org
> +Description:
> +		(WO) When a boolean 'true' is written to this attribute the
> +		region driver retrieves the poison list for the capacity
> +		each device contributes to this region. The list includes
Trivial: Same as in previous patch. "includes" is too vague.

> +		addresses that are poisoned, or would result in poison if
> +		accessed, and the source of the poison. The retrieved
> +		errors are logged as kernel trace events with the label
> +		'cxl_poison'.
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index f9ae5ad284ff..68821238491e 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -72,6 +72,38 @@ static int is_dup(struct device *match, void *data)
>  	return 0;
>  }
>  
> +static ssize_t trigger_poison_list_store(struct device *dev,
> +					 struct device_attribute *attr,
> +					 const char *buf, size_t len)
> +{
> +	struct cxl_region *cxlr = to_cxl_region(dev);
> +	struct cxl_region_params *p = &cxlr->params;
> +	struct cxl_endpoint_decoder *cxled;
> +	struct cxl_memdev *cxlmd;
> +	u64 offset, length;
> +	int rc, i;
> +	bool tmp;
> +
> +	if (kstrtobool(buf, &tmp))
> +		return -EINVAL;
> +
> +	for (i = 0; i <  p->nr_targets; i++) {
> +		cxled = p->targets[i];
> +		cxlmd = cxled_to_memdev(cxled);
> +		if (!test_bit(CXL_MEM_COMMAND_ID_GET_POISON,
> +			      cxlmd->cxlds->enabled_cmds))
> +			continue;
> +
> +		offset = cxl_dpa_resource_start(cxled);
> +		length = cxl_dpa_size(cxled);
> +		rc = cxl_mem_get_poison(cxlmd, offset, length, cxlr);
> +		if (rc)
> +			return rc;
> +	}
> +	return len;
> +}
> +static DEVICE_ATTR_WO(trigger_poison_list);
> +
>  static ssize_t uuid_store(struct device *dev, struct device_attribute *attr,
>  			  const char *buf, size_t len)
>  {
> @@ -570,6 +602,7 @@ static struct attribute *cxl_region_attrs[] = {
>  	&dev_attr_interleave_granularity.attr,
>  	&dev_attr_resource.attr,
>  	&dev_attr_size.attr,
> +	&dev_attr_trigger_poison_list.attr,
>  	NULL,
>  };
>
  
Alison Schofield Nov. 18, 2022, 12:24 a.m. UTC | #2
On Wed, Nov 16, 2022 at 12:50:38PM +0000, Jonathan Cameron wrote:
> On Thu, 10 Nov 2022 19:12:42 -0800
> alison.schofield@intel.com wrote:
> 
> > From: Alison Schofield <alison.schofield@intel.com>
> > 
> > When a boolean 'true' is written to this attribute the region driver
> > retrieves the poison list for the capacity each device contributes
> > to this region. The list includes addresses that are poisoned, or
> > would result in poison if accessed, and the source of the poison.
> > The retrieved errors are logged as kernel trace events with the
> > label 'cxl_poison'.
> > 
> > Devices not supporting the poison list capability are ignored.
> > 
> > Signed-off-by: Alison Schofield <alison.schofield@intel.com>
> Trivial comment inline you might want to consider.

Thanks, got it!  I will 'include' your tag going forward.
> 
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> 
> > ---
> >  Documentation/ABI/testing/sysfs-bus-cxl | 14 +++++++++++
> >  drivers/cxl/core/region.c               | 33 +++++++++++++++++++++++++
> >  2 files changed, 47 insertions(+)
> > 
> > diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl
> > index 1c5f4a853ba2..54fad3bdcb2b 100644
> > --- a/Documentation/ABI/testing/sysfs-bus-cxl
> > +++ b/Documentation/ABI/testing/sysfs-bus-cxl
> > @@ -402,3 +402,17 @@ Description:
> >  		attribute is only visible for devices supporting the
> >  		capability. The retrieved errors are logged as kernel
> >  		trace events with the label 'cxl_poison'.
> > +
> > +
> > +What:		/sys/bus/cxl/devices/regionZ/trigger_poison_list
> > +Date:		November, 2022
> > +KernelVersion:	v6.2
> > +Contact:	linux-cxl@vger.kernel.org
> > +Description:
> > +		(WO) When a boolean 'true' is written to this attribute the
> > +		region driver retrieves the poison list for the capacity
> > +		each device contributes to this region. The list includes
> Trivial: Same as in previous patch. "includes" is too vague.
> 
> > +		addresses that are poisoned, or would result in poison if
> > +		accessed, and the source of the poison. The retrieved
> > +		errors are logged as kernel trace events with the label
> > +		'cxl_poison'.
> > diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> > index f9ae5ad284ff..68821238491e 100644
> > --- a/drivers/cxl/core/region.c
> > +++ b/drivers/cxl/core/region.c
> > @@ -72,6 +72,38 @@ static int is_dup(struct device *match, void *data)
> >  	return 0;
> >  }
> >  
> > +static ssize_t trigger_poison_list_store(struct device *dev,
> > +					 struct device_attribute *attr,
> > +					 const char *buf, size_t len)
> > +{
> > +	struct cxl_region *cxlr = to_cxl_region(dev);
> > +	struct cxl_region_params *p = &cxlr->params;
> > +	struct cxl_endpoint_decoder *cxled;
> > +	struct cxl_memdev *cxlmd;
> > +	u64 offset, length;
> > +	int rc, i;
> > +	bool tmp;
> > +
> > +	if (kstrtobool(buf, &tmp))
> > +		return -EINVAL;
> > +
> > +	for (i = 0; i <  p->nr_targets; i++) {
> > +		cxled = p->targets[i];
> > +		cxlmd = cxled_to_memdev(cxled);
> > +		if (!test_bit(CXL_MEM_COMMAND_ID_GET_POISON,
> > +			      cxlmd->cxlds->enabled_cmds))
> > +			continue;
> > +
> > +		offset = cxl_dpa_resource_start(cxled);
> > +		length = cxl_dpa_size(cxled);
> > +		rc = cxl_mem_get_poison(cxlmd, offset, length, cxlr);
> > +		if (rc)
> > +			return rc;
> > +	}
> > +	return len;
> > +}
> > +static DEVICE_ATTR_WO(trigger_poison_list);
> > +
> >  static ssize_t uuid_store(struct device *dev, struct device_attribute *attr,
> >  			  const char *buf, size_t len)
> >  {
> > @@ -570,6 +602,7 @@ static struct attribute *cxl_region_attrs[] = {
> >  	&dev_attr_interleave_granularity.attr,
> >  	&dev_attr_resource.attr,
> >  	&dev_attr_size.attr,
> > +	&dev_attr_trigger_poison_list.attr,
> >  	NULL,
> >  };
> >  
>
  

Patch

diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl
index 1c5f4a853ba2..54fad3bdcb2b 100644
--- a/Documentation/ABI/testing/sysfs-bus-cxl
+++ b/Documentation/ABI/testing/sysfs-bus-cxl
@@ -402,3 +402,17 @@  Description:
 		attribute is only visible for devices supporting the
 		capability. The retrieved errors are logged as kernel
 		trace events with the label 'cxl_poison'.
+
+
+What:		/sys/bus/cxl/devices/regionZ/trigger_poison_list
+Date:		November, 2022
+KernelVersion:	v6.2
+Contact:	linux-cxl@vger.kernel.org
+Description:
+		(WO) When a boolean 'true' is written to this attribute the
+		region driver retrieves the poison list for the capacity
+		each device contributes to this region. The list includes
+		addresses that are poisoned, or would result in poison if
+		accessed, and the source of the poison. The retrieved
+		errors are logged as kernel trace events with the label
+		'cxl_poison'.
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index f9ae5ad284ff..68821238491e 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -72,6 +72,38 @@  static int is_dup(struct device *match, void *data)
 	return 0;
 }
 
+static ssize_t trigger_poison_list_store(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t len)
+{
+	struct cxl_region *cxlr = to_cxl_region(dev);
+	struct cxl_region_params *p = &cxlr->params;
+	struct cxl_endpoint_decoder *cxled;
+	struct cxl_memdev *cxlmd;
+	u64 offset, length;
+	int rc, i;
+	bool tmp;
+
+	if (kstrtobool(buf, &tmp))
+		return -EINVAL;
+
+	for (i = 0; i <  p->nr_targets; i++) {
+		cxled = p->targets[i];
+		cxlmd = cxled_to_memdev(cxled);
+		if (!test_bit(CXL_MEM_COMMAND_ID_GET_POISON,
+			      cxlmd->cxlds->enabled_cmds))
+			continue;
+
+		offset = cxl_dpa_resource_start(cxled);
+		length = cxl_dpa_size(cxled);
+		rc = cxl_mem_get_poison(cxlmd, offset, length, cxlr);
+		if (rc)
+			return rc;
+	}
+	return len;
+}
+static DEVICE_ATTR_WO(trigger_poison_list);
+
 static ssize_t uuid_store(struct device *dev, struct device_attribute *attr,
 			  const char *buf, size_t len)
 {
@@ -570,6 +602,7 @@  static struct attribute *cxl_region_attrs[] = {
 	&dev_attr_interleave_granularity.attr,
 	&dev_attr_resource.attr,
 	&dev_attr_size.attr,
+	&dev_attr_trigger_poison_list.attr,
 	NULL,
 };