[2/2] misc: smpro-errmon: Add dimm training failure syndrome

Message ID 20230214064509.3622044-3-quan@os.amperecomputing.com
State New
Headers
Series Add DIMM 2x refresh event and failure syndrome |

Commit Message

Quan Nguyen Feb. 14, 2023, 6:45 a.m. UTC
  Adds event_dimm[0-15]_syndrome sysfs to report the failure syndrome
to BMC when DIMM training failed.

Signed-off-by: Quan Nguyen <quan@os.amperecomputing.com>
---
 .../sysfs-bus-platform-devices-ampere-smpro   | 10 +++
 drivers/misc/smpro-errmon.c                   | 77 +++++++++++++++++++
 2 files changed, 87 insertions(+)
  

Comments

Paul Menzel Feb. 15, 2023, 7:33 a.m. UTC | #1
Dear Quan,


Thank you for your patch.

Am 14.02.23 um 07:45 schrieb Quan Nguyen:
> Adds event_dimm[0-15]_syndrome sysfs to report the failure syndrome
> to BMC when DIMM training failed.

Where you able to verify that it works? Out of curiosity, how?

> Signed-off-by: Quan Nguyen <quan@os.amperecomputing.com>
> ---
>   .../sysfs-bus-platform-devices-ampere-smpro   | 10 +++
>   drivers/misc/smpro-errmon.c                   | 77 +++++++++++++++++++
>   2 files changed, 87 insertions(+)
> 
> diff --git a/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro b/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
> index d4e3f308c451..c35f1d45e656 100644
> --- a/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
> +++ b/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
> @@ -265,6 +265,16 @@ Description:
>   		For more details, see section `5.7 GPI Status Registers and 5.9 Memory Error Register Definitions,
>   		Altra Family Soc BMC Interface Specification`.
>   
> +What:		/sys/bus/platform/devices/smpro-errmon.*/event_dimm[0-15]_syndrome
> +KernelVersion:	6.1

Should it be 6.2, as it probably won’t make it into 6.1?

> +Contact:	Quan Nguyen <quan@os.amperecomputing.com>
> +Description:
> +		(RO) The sysfs returns the 2-byte DIMM failure syndrome data for slot
> +		0-15 if it failed to initialized.

to initialize

> +
> +		For more details, see section `5.11 Boot Stage Register Definitions,
> +		Altra Family Soc BMC Interface Specification`.
> +
>   What:		/sys/bus/platform/devices/smpro-misc.*/boot_progress
>   KernelVersion:	6.1
>   Contact:	Quan Nguyen <quan@os.amperecomputing.com>
> diff --git a/drivers/misc/smpro-errmon.c b/drivers/misc/smpro-errmon.c
> index 1635e881aefb..3e8570cbb740 100644
> --- a/drivers/misc/smpro-errmon.c
> +++ b/drivers/misc/smpro-errmon.c
> @@ -47,6 +47,12 @@
>   #define WARN_PMPRO_INFO_LO	0xAC
>   #define WARN_PMPRO_INFO_HI	0xAD
>   
> +/* Boot Stage Register */
> +#define BOOTSTAGE		0xB0
> +#define DIMM_SYNDROME_SEL	0xB4
> +#define DIMM_SYNDROME_ERR	0xB5
> +#define DIMM_SYNDROME_STAGE	4
> +
>   /* PCIE Error Registers */
>   #define PCIE_CE_ERR_CNT		0xC0
>   #define PCIE_CE_ERR_LEN		0xC1
> @@ -468,6 +474,61 @@ EVENT_RO(vrd_hot, VRD_HOT_EVENT);
>   EVENT_RO(dimm_hot, DIMM_HOT_EVENT);
>   EVENT_RO(dimm_2x_refresh, DIMM_2X_REFRESH_EVENT);
>   
> +static ssize_t smpro_dimm_syndrome_read(struct device *dev, struct device_attribute *da,
> +					char *buf, int slot)

Could `slot` be passed as `unsigned int`?

> +{
> +	struct smpro_errmon *errmon = dev_get_drvdata(dev);
> +	s32 data;
> +	int ret;
> +
> +	ret = regmap_read(errmon->regmap, BOOTSTAGE, &data);

The function signature is:

     int regmap_read(struct regmap *map, unsigned int reg, unsigned int 
*val)

So why not use unsigned int as data type for `data`?

> +	if (ret)
> +		return ret;
> +
> +	/* check for valid stage */
> +	data = (data >> 8) & 0xff;
> +	if (data != DIMM_SYNDROME_STAGE)
> +		return ret;

Isn’t now success returned? Should a debug message be printed?

> +
> +	/* Write the slot ID to retrieve Error Syndrome */
> +	ret = regmap_write(errmon->regmap, DIMM_SYNDROME_SEL, slot);
> +	if (ret)
> +		return ret;
> +
> +	/* Read the Syndrome error */
> +	ret = regmap_read(errmon->regmap, DIMM_SYNDROME_ERR, &data);
> +	if (ret || !data)
> +		return ret;
> +
> +	return sysfs_emit(buf, "%04x\n", data);
> +}
> +
> +#define EVENT_DIMM_SYNDROME(_slot) \
> +	static ssize_t event_dimm##_slot##_syndrome_show(struct device *dev,          \
> +							 struct device_attribute *da, \
> +							 char *buf)                   \
> +	{                                                                             \
> +		return smpro_dimm_syndrome_read(dev, da, buf, _slot);                 \
> +	}                                                                             \
> +	static DEVICE_ATTR_RO(event_dimm##_slot##_syndrome)
> +
> +EVENT_DIMM_SYNDROME(0);
> +EVENT_DIMM_SYNDROME(1);
> +EVENT_DIMM_SYNDROME(2);
> +EVENT_DIMM_SYNDROME(3);
> +EVENT_DIMM_SYNDROME(4);
> +EVENT_DIMM_SYNDROME(5);
> +EVENT_DIMM_SYNDROME(6);
> +EVENT_DIMM_SYNDROME(7);
> +EVENT_DIMM_SYNDROME(8);
> +EVENT_DIMM_SYNDROME(9);
> +EVENT_DIMM_SYNDROME(10);
> +EVENT_DIMM_SYNDROME(11);
> +EVENT_DIMM_SYNDROME(12);
> +EVENT_DIMM_SYNDROME(13);
> +EVENT_DIMM_SYNDROME(14);
> +EVENT_DIMM_SYNDROME(15);
> +
>   static struct attribute *smpro_errmon_attrs[] = {
>   	&dev_attr_overflow_core_ce.attr,
>   	&dev_attr_overflow_core_ue.attr,
> @@ -493,6 +554,22 @@ static struct attribute *smpro_errmon_attrs[] = {
>   	&dev_attr_event_vrd_hot.attr,
>   	&dev_attr_event_dimm_hot.attr,
>   	&dev_attr_event_dimm_2x_refresh.attr,
> +	&dev_attr_event_dimm0_syndrome.attr,
> +	&dev_attr_event_dimm1_syndrome.attr,
> +	&dev_attr_event_dimm2_syndrome.attr,
> +	&dev_attr_event_dimm3_syndrome.attr,
> +	&dev_attr_event_dimm4_syndrome.attr,
> +	&dev_attr_event_dimm5_syndrome.attr,
> +	&dev_attr_event_dimm6_syndrome.attr,
> +	&dev_attr_event_dimm7_syndrome.attr,
> +	&dev_attr_event_dimm8_syndrome.attr,
> +	&dev_attr_event_dimm9_syndrome.attr,
> +	&dev_attr_event_dimm10_syndrome.attr,
> +	&dev_attr_event_dimm11_syndrome.attr,
> +	&dev_attr_event_dimm12_syndrome.attr,
> +	&dev_attr_event_dimm13_syndrome.attr,
> +	&dev_attr_event_dimm14_syndrome.attr,
> +	&dev_attr_event_dimm15_syndrome.attr,
>   	NULL
>   };


Kind regards,

Paul
  
Quan Nguyen Feb. 16, 2023, 3:22 a.m. UTC | #2
On 15/02/2023 14:33, Paul Menzel wrote:
> Dear Quan,
> 
> 
> Thank you for your patch.
> 

Thanks Paul for the review.

> Am 14.02.23 um 07:45 schrieb Quan Nguyen:
>> Adds event_dimm[0-15]_syndrome sysfs to report the failure syndrome
>> to BMC when DIMM training failed.
> 
> Where you able to verify that it works? Out of curiosity, how?
> 

Yes, we verified it by injecting DIMM errors and confirm that errors was 
reported correctly via sysfs.
For about how to do error injection, we may  need to refer to section 
3.2 Memory Error Group in Altra Family RAS Error Injection User Manual. 
It is shared in our Ampere Customer Connect [1]. The latest version is 
v1.00_20220329.

[1] https://connect.amperecomputing.com

>> Signed-off-by: Quan Nguyen <quan@os.amperecomputing.com>
>> ---
>>   .../sysfs-bus-platform-devices-ampere-smpro   | 10 +++
>>   drivers/misc/smpro-errmon.c                   | 77 +++++++++++++++++++
>>   2 files changed, 87 insertions(+)
>>
>> diff --git 
>> a/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro 
>> b/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
>> index d4e3f308c451..c35f1d45e656 100644
>> --- a/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
>> +++ b/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
>> @@ -265,6 +265,16 @@ Description:
>>           For more details, see section `5.7 GPI Status Registers and 
>> 5.9 Memory Error Register Definitions,
>>           Altra Family Soc BMC Interface Specification`.
>> +What:        
>> /sys/bus/platform/devices/smpro-errmon.*/event_dimm[0-15]_syndrome
>> +KernelVersion:    6.1
> 
> Should it be 6.2, as it probably won’t make it into 6.1?
> 

Thanks for the catch. Will fix in next version.

>> +Contact:    Quan Nguyen <quan@os.amperecomputing.com>
>> +Description:
>> +        (RO) The sysfs returns the 2-byte DIMM failure syndrome data 
>> for slot
>> +        0-15 if it failed to initialized.
> 
> to initialize
> 

Will fix in next version.

>> +
>> +        For more details, see section `5.11 Boot Stage Register 
>> Definitions,
>> +        Altra Family Soc BMC Interface Specification`.
>> +
>>   What:        /sys/bus/platform/devices/smpro-misc.*/boot_progress
>>   KernelVersion:    6.1
>>   Contact:    Quan Nguyen <quan@os.amperecomputing.com>
>> diff --git a/drivers/misc/smpro-errmon.c b/drivers/misc/smpro-errmon.c
>> index 1635e881aefb..3e8570cbb740 100644
>> --- a/drivers/misc/smpro-errmon.c
>> +++ b/drivers/misc/smpro-errmon.c
>> @@ -47,6 +47,12 @@
>>   #define WARN_PMPRO_INFO_LO    0xAC
>>   #define WARN_PMPRO_INFO_HI    0xAD
>> +/* Boot Stage Register */
>> +#define BOOTSTAGE        0xB0
>> +#define DIMM_SYNDROME_SEL    0xB4
>> +#define DIMM_SYNDROME_ERR    0xB5
>> +#define DIMM_SYNDROME_STAGE    4
>> +
>>   /* PCIE Error Registers */
>>   #define PCIE_CE_ERR_CNT        0xC0
>>   #define PCIE_CE_ERR_LEN        0xC1
>> @@ -468,6 +474,61 @@ EVENT_RO(vrd_hot, VRD_HOT_EVENT);
>>   EVENT_RO(dimm_hot, DIMM_HOT_EVENT);
>>   EVENT_RO(dimm_2x_refresh, DIMM_2X_REFRESH_EVENT);
>> +static ssize_t smpro_dimm_syndrome_read(struct device *dev, struct 
>> device_attribute *da,
>> +                    char *buf, int slot)
> 
> Could `slot` be passed as `unsigned int`?
> 

Yes, I will update in next version.
Indeed, the "slot" will be passed as the last param to regmap_write() 
which is in unsigned int as below.

int regmap_write(struct regmap *map, unsigned int reg, unsigned int val);

So, change to "unsigned int" is corrected.

>> +{
>> +    struct smpro_errmon *errmon = dev_get_drvdata(dev);
>> +    s32 data;
>> +    int ret;
>> +
>> +    ret = regmap_read(errmon->regmap, BOOTSTAGE, &data);
> 
> The function signature is:
> 
>      int regmap_read(struct regmap *map, unsigned int reg, unsigned int 
> *val)
> 
> So why not use unsigned int as data type for `data`?
> 

Agree, you comment is correct. Will change data to "unsigned int" in new 
version.

>> +    if (ret)
>> +        return ret;
>> +
>> +    /* check for valid stage */
>> +    data = (data >> 8) & 0xff;
>> +    if (data != DIMM_SYNDROME_STAGE)
>> +        return ret;
> 
> Isn’t now success returned? Should a debug message be printed?
> 

Yes, this is a success case, there is no dimm training error and has 
nothing to return in sysfs.
If no dimm training error, host will never stay in DIMM_SYNDROME_STAGE.

I dont think printing a debug message in this case is good because 
printing out something like "No dimm traning error" is not necessary as 
I think.

>> +
>> +    /* Write the slot ID to retrieve Error Syndrome */
>> +    ret = regmap_write(errmon->regmap, DIMM_SYNDROME_SEL, slot);
>> +    if (ret)
>> +        return ret;
>> +
>> +    /* Read the Syndrome error */
>> +    ret = regmap_read(errmon->regmap, DIMM_SYNDROME_ERR, &data);
>> +    if (ret || !data)
>> +        return ret;
>> +
>> +    return sysfs_emit(buf, "%04x\n", data);
>> +}
>> +
>> +#define EVENT_DIMM_SYNDROME(_slot) \
>> +    static ssize_t event_dimm##_slot##_syndrome_show(struct device 
>> *dev,          \
>> +                             struct device_attribute *da, \
>> +                             char *buf)                   \
>> +    
>> {                                                                             \
>> +        return smpro_dimm_syndrome_read(dev, da, buf, 
>> _slot);                 \
>> +    
>> }                                                                             \
>> +    static DEVICE_ATTR_RO(event_dimm##_slot##_syndrome)
>> +
>> +EVENT_DIMM_SYNDROME(0);
>> +EVENT_DIMM_SYNDROME(1);
>> +EVENT_DIMM_SYNDROME(2);
>> +EVENT_DIMM_SYNDROME(3);
>> +EVENT_DIMM_SYNDROME(4);
>> +EVENT_DIMM_SYNDROME(5);
>> +EVENT_DIMM_SYNDROME(6);
>> +EVENT_DIMM_SYNDROME(7);
>> +EVENT_DIMM_SYNDROME(8);
>> +EVENT_DIMM_SYNDROME(9);
>> +EVENT_DIMM_SYNDROME(10);
>> +EVENT_DIMM_SYNDROME(11);
>> +EVENT_DIMM_SYNDROME(12);
>> +EVENT_DIMM_SYNDROME(13);
>> +EVENT_DIMM_SYNDROME(14);
>> +EVENT_DIMM_SYNDROME(15);
>> +
>>   static struct attribute *smpro_errmon_attrs[] = {
>>       &dev_attr_overflow_core_ce.attr,
>>       &dev_attr_overflow_core_ue.attr,
>> @@ -493,6 +554,22 @@ static struct attribute *smpro_errmon_attrs[] = {
>>       &dev_attr_event_vrd_hot.attr,
>>       &dev_attr_event_dimm_hot.attr,
>>       &dev_attr_event_dimm_2x_refresh.attr,
>> +    &dev_attr_event_dimm0_syndrome.attr,
>> +    &dev_attr_event_dimm1_syndrome.attr,
>> +    &dev_attr_event_dimm2_syndrome.attr,
>> +    &dev_attr_event_dimm3_syndrome.attr,
>> +    &dev_attr_event_dimm4_syndrome.attr,
>> +    &dev_attr_event_dimm5_syndrome.attr,
>> +    &dev_attr_event_dimm6_syndrome.attr,
>> +    &dev_attr_event_dimm7_syndrome.attr,
>> +    &dev_attr_event_dimm8_syndrome.attr,
>> +    &dev_attr_event_dimm9_syndrome.attr,
>> +    &dev_attr_event_dimm10_syndrome.attr,
>> +    &dev_attr_event_dimm11_syndrome.attr,
>> +    &dev_attr_event_dimm12_syndrome.attr,
>> +    &dev_attr_event_dimm13_syndrome.attr,
>> +    &dev_attr_event_dimm14_syndrome.attr,
>> +    &dev_attr_event_dimm15_syndrome.attr,
>>       NULL
>>   };
> 
> 
> Kind regards,
> 
> Paul
  
Greg KH Feb. 16, 2023, 12:30 p.m. UTC | #3
On Thu, Feb 16, 2023 at 10:22:14AM +0700, Quan Nguyen wrote:
> 
> 
> On 15/02/2023 14:33, Paul Menzel wrote:
> > Dear Quan,
> > 
> > 
> > Thank you for your patch.
> > 
> 
> Thanks Paul for the review.
> 
> > Am 14.02.23 um 07:45 schrieb Quan Nguyen:
> > > Adds event_dimm[0-15]_syndrome sysfs to report the failure syndrome
> > > to BMC when DIMM training failed.
> > 
> > Where you able to verify that it works? Out of curiosity, how?
> > 
> 
> Yes, we verified it by injecting DIMM errors and confirm that errors was
> reported correctly via sysfs.
> For about how to do error injection, we may  need to refer to section 3.2
> Memory Error Group in Altra Family RAS Error Injection User Manual. It is
> shared in our Ampere Customer Connect [1]. The latest version is
> v1.00_20220329.
> 
> [1] https://connect.amperecomputing.com
> 
> > > Signed-off-by: Quan Nguyen <quan@os.amperecomputing.com>
> > > ---
> > >   .../sysfs-bus-platform-devices-ampere-smpro   | 10 +++
> > >   drivers/misc/smpro-errmon.c                   | 77 +++++++++++++++++++
> > >   2 files changed, 87 insertions(+)
> > > 
> > > diff --git
> > > a/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
> > > b/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
> > > index d4e3f308c451..c35f1d45e656 100644
> > > --- a/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
> > > +++ b/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
> > > @@ -265,6 +265,16 @@ Description:
> > >           For more details, see section `5.7 GPI Status Registers
> > > and 5.9 Memory Error Register Definitions,
> > >           Altra Family Soc BMC Interface Specification`.
> > > +What:
> > > /sys/bus/platform/devices/smpro-errmon.*/event_dimm[0-15]_syndrome
> > > +KernelVersion:    6.1
> > 
> > Should it be 6.2, as it probably won’t make it into 6.1?
> > 
> 
> Thanks for the catch. Will fix in next version.

Should be 6.3, it's missed the 6.2 merge window cycle, sorry.

thanks,

greg k-h
  
Quan Nguyen Feb. 17, 2023, 2:36 a.m. UTC | #4
On 16/02/2023 19:30, Greg Kroah-Hartman wrote:
> On Thu, Feb 16, 2023 at 10:22:14AM +0700, Quan Nguyen wrote:
>>
>>
>> On 15/02/2023 14:33, Paul Menzel wrote:
>>> Dear Quan,
>>>
>>>
>>> Thank you for your patch.
>>>
>>
>> Thanks Paul for the review.
>>
>>> Am 14.02.23 um 07:45 schrieb Quan Nguyen:
>>>> Adds event_dimm[0-15]_syndrome sysfs to report the failure syndrome
>>>> to BMC when DIMM training failed.
>>>
>>> Where you able to verify that it works? Out of curiosity, how?
>>>
>>
>> Yes, we verified it by injecting DIMM errors and confirm that errors was
>> reported correctly via sysfs.
>> For about how to do error injection, we may  need to refer to section 3.2
>> Memory Error Group in Altra Family RAS Error Injection User Manual. It is
>> shared in our Ampere Customer Connect [1]. The latest version is
>> v1.00_20220329.
>>
>> [1] https://connect.amperecomputing.com
>>
>>>> Signed-off-by: Quan Nguyen <quan@os.amperecomputing.com>
>>>> ---
>>>>    .../sysfs-bus-platform-devices-ampere-smpro   | 10 +++
>>>>    drivers/misc/smpro-errmon.c                   | 77 +++++++++++++++++++
>>>>    2 files changed, 87 insertions(+)
>>>>
>>>> diff --git
>>>> a/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
>>>> b/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
>>>> index d4e3f308c451..c35f1d45e656 100644
>>>> --- a/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
>>>> +++ b/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
>>>> @@ -265,6 +265,16 @@ Description:
>>>>            For more details, see section `5.7 GPI Status Registers
>>>> and 5.9 Memory Error Register Definitions,
>>>>            Altra Family Soc BMC Interface Specification`.
>>>> +What:
>>>> /sys/bus/platform/devices/smpro-errmon.*/event_dimm[0-15]_syndrome
>>>> +KernelVersion:    6.1
>>>
>>> Should it be 6.2, as it probably won’t make it into 6.1?
>>>
>>
>> Thanks for the catch. Will fix in next version.
> 
> Should be 6.3, it's missed the 6.2 merge window cycle, sorry.
> 
> thanks,
> 

Thanks Greg,
Will update to 6.3
  

Patch

diff --git a/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro b/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
index d4e3f308c451..c35f1d45e656 100644
--- a/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
+++ b/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
@@ -265,6 +265,16 @@  Description:
 		For more details, see section `5.7 GPI Status Registers and 5.9 Memory Error Register Definitions,
 		Altra Family Soc BMC Interface Specification`.
 
+What:		/sys/bus/platform/devices/smpro-errmon.*/event_dimm[0-15]_syndrome
+KernelVersion:	6.1
+Contact:	Quan Nguyen <quan@os.amperecomputing.com>
+Description:
+		(RO) The sysfs returns the 2-byte DIMM failure syndrome data for slot
+		0-15 if it failed to initialized.
+
+		For more details, see section `5.11 Boot Stage Register Definitions,
+		Altra Family Soc BMC Interface Specification`.
+
 What:		/sys/bus/platform/devices/smpro-misc.*/boot_progress
 KernelVersion:	6.1
 Contact:	Quan Nguyen <quan@os.amperecomputing.com>
diff --git a/drivers/misc/smpro-errmon.c b/drivers/misc/smpro-errmon.c
index 1635e881aefb..3e8570cbb740 100644
--- a/drivers/misc/smpro-errmon.c
+++ b/drivers/misc/smpro-errmon.c
@@ -47,6 +47,12 @@ 
 #define WARN_PMPRO_INFO_LO	0xAC
 #define WARN_PMPRO_INFO_HI	0xAD
 
+/* Boot Stage Register */
+#define BOOTSTAGE		0xB0
+#define DIMM_SYNDROME_SEL	0xB4
+#define DIMM_SYNDROME_ERR	0xB5
+#define DIMM_SYNDROME_STAGE	4
+
 /* PCIE Error Registers */
 #define PCIE_CE_ERR_CNT		0xC0
 #define PCIE_CE_ERR_LEN		0xC1
@@ -468,6 +474,61 @@  EVENT_RO(vrd_hot, VRD_HOT_EVENT);
 EVENT_RO(dimm_hot, DIMM_HOT_EVENT);
 EVENT_RO(dimm_2x_refresh, DIMM_2X_REFRESH_EVENT);
 
+static ssize_t smpro_dimm_syndrome_read(struct device *dev, struct device_attribute *da,
+					char *buf, int slot)
+{
+	struct smpro_errmon *errmon = dev_get_drvdata(dev);
+	s32 data;
+	int ret;
+
+	ret = regmap_read(errmon->regmap, BOOTSTAGE, &data);
+	if (ret)
+		return ret;
+
+	/* check for valid stage */
+	data = (data >> 8) & 0xff;
+	if (data != DIMM_SYNDROME_STAGE)
+		return ret;
+
+	/* Write the slot ID to retrieve Error Syndrome */
+	ret = regmap_write(errmon->regmap, DIMM_SYNDROME_SEL, slot);
+	if (ret)
+		return ret;
+
+	/* Read the Syndrome error */
+	ret = regmap_read(errmon->regmap, DIMM_SYNDROME_ERR, &data);
+	if (ret || !data)
+		return ret;
+
+	return sysfs_emit(buf, "%04x\n", data);
+}
+
+#define EVENT_DIMM_SYNDROME(_slot) \
+	static ssize_t event_dimm##_slot##_syndrome_show(struct device *dev,          \
+							 struct device_attribute *da, \
+							 char *buf)                   \
+	{                                                                             \
+		return smpro_dimm_syndrome_read(dev, da, buf, _slot);                 \
+	}                                                                             \
+	static DEVICE_ATTR_RO(event_dimm##_slot##_syndrome)
+
+EVENT_DIMM_SYNDROME(0);
+EVENT_DIMM_SYNDROME(1);
+EVENT_DIMM_SYNDROME(2);
+EVENT_DIMM_SYNDROME(3);
+EVENT_DIMM_SYNDROME(4);
+EVENT_DIMM_SYNDROME(5);
+EVENT_DIMM_SYNDROME(6);
+EVENT_DIMM_SYNDROME(7);
+EVENT_DIMM_SYNDROME(8);
+EVENT_DIMM_SYNDROME(9);
+EVENT_DIMM_SYNDROME(10);
+EVENT_DIMM_SYNDROME(11);
+EVENT_DIMM_SYNDROME(12);
+EVENT_DIMM_SYNDROME(13);
+EVENT_DIMM_SYNDROME(14);
+EVENT_DIMM_SYNDROME(15);
+
 static struct attribute *smpro_errmon_attrs[] = {
 	&dev_attr_overflow_core_ce.attr,
 	&dev_attr_overflow_core_ue.attr,
@@ -493,6 +554,22 @@  static struct attribute *smpro_errmon_attrs[] = {
 	&dev_attr_event_vrd_hot.attr,
 	&dev_attr_event_dimm_hot.attr,
 	&dev_attr_event_dimm_2x_refresh.attr,
+	&dev_attr_event_dimm0_syndrome.attr,
+	&dev_attr_event_dimm1_syndrome.attr,
+	&dev_attr_event_dimm2_syndrome.attr,
+	&dev_attr_event_dimm3_syndrome.attr,
+	&dev_attr_event_dimm4_syndrome.attr,
+	&dev_attr_event_dimm5_syndrome.attr,
+	&dev_attr_event_dimm6_syndrome.attr,
+	&dev_attr_event_dimm7_syndrome.attr,
+	&dev_attr_event_dimm8_syndrome.attr,
+	&dev_attr_event_dimm9_syndrome.attr,
+	&dev_attr_event_dimm10_syndrome.attr,
+	&dev_attr_event_dimm11_syndrome.attr,
+	&dev_attr_event_dimm12_syndrome.attr,
+	&dev_attr_event_dimm13_syndrome.attr,
+	&dev_attr_event_dimm14_syndrome.attr,
+	&dev_attr_event_dimm15_syndrome.attr,
 	NULL
 };