[08/11] cxl/mem: Wire up event interrupts

Message ID 20221110185758.879472-9-ira.weiny@intel.com
State New
Headers
Series CXL: Process event logs |

Commit Message

Ira Weiny Nov. 10, 2022, 6:57 p.m. UTC
  From: Ira Weiny <ira.weiny@intel.com>

CXL device events are signaled via interrupts.  Each event log may have
a different interrupt message number.  These message numbers are
reported in the Get Event Interrupt Policy mailbox command.

Add interrupt support for event logs.  Interrupts are allocated as
shared interrupts.  Therefore, all or some event logs can share the same
message number.

The driver must deal with the possibility that dynamic capacity is not
yet supported by a device it sees.  Fallback and retry without dynamic
capacity if the first attempt fails.

Device capacity event logs interrupt as part of the informational event
log.  Check the event status to see which log has data.

Signed-off-by: Ira Weiny <ira.weiny@intel.com>

---
Changes from RFC v2
	Adjust to new irq 16 vector allocation
	Jonathan
		Remove CXL_INT_RES
	Use irq threads to ensure mailbox commands are executed outside irq context
	Adjust for optional Dynamic Capacity log
---
 drivers/cxl/core/mbox.c      |  53 +++++++++++++-
 drivers/cxl/cxlmem.h         |  31 ++++++++
 drivers/cxl/pci.c            | 133 +++++++++++++++++++++++++++++++++++
 include/uapi/linux/cxl_mem.h |   2 +
 4 files changed, 217 insertions(+), 2 deletions(-)
  

Comments

Dave Jiang Nov. 15, 2022, 11:13 p.m. UTC | #1
On 11/10/2022 10:57 AM, ira.weiny@intel.com wrote:
> From: Ira Weiny <ira.weiny@intel.com>
> 
> CXL device events are signaled via interrupts.  Each event log may have
> a different interrupt message number.  These message numbers are
> reported in the Get Event Interrupt Policy mailbox command.
> 
> Add interrupt support for event logs.  Interrupts are allocated as
> shared interrupts.  Therefore, all or some event logs can share the same
> message number.
> 
> The driver must deal with the possibility that dynamic capacity is not
> yet supported by a device it sees.  Fallback and retry without dynamic
> capacity if the first attempt fails.
> 
> Device capacity event logs interrupt as part of the informational event
> log.  Check the event status to see which log has data.
> 
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> 
> ---
> Changes from RFC v2
> 	Adjust to new irq 16 vector allocation
> 	Jonathan
> 		Remove CXL_INT_RES
> 	Use irq threads to ensure mailbox commands are executed outside irq context
> 	Adjust for optional Dynamic Capacity log
> ---
>   drivers/cxl/core/mbox.c      |  53 +++++++++++++-
>   drivers/cxl/cxlmem.h         |  31 ++++++++
>   drivers/cxl/pci.c            | 133 +++++++++++++++++++++++++++++++++++
>   include/uapi/linux/cxl_mem.h |   2 +
>   4 files changed, 217 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index 879b228a98a0..1e6762af2a00 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -53,6 +53,8 @@ static struct cxl_mem_command cxl_mem_commands[CXL_MEM_COMMAND_ID_MAX] = {
>   	CXL_CMD(GET_SUPPORTED_LOGS, 0, CXL_VARIABLE_PAYLOAD, CXL_CMD_FLAG_FORCE_ENABLE),
>   	CXL_CMD(GET_EVENT_RECORD, 1, CXL_VARIABLE_PAYLOAD, 0),
>   	CXL_CMD(CLEAR_EVENT_RECORD, CXL_VARIABLE_PAYLOAD, 0, 0),
> +	CXL_CMD(GET_EVT_INT_POLICY, 0, 0x5, 0),
> +	CXL_CMD(SET_EVT_INT_POLICY, 0x5, 0, 0),
>   	CXL_CMD(GET_FW_INFO, 0, 0x50, 0),
>   	CXL_CMD(GET_PARTITION_INFO, 0, 0x20, 0),
>   	CXL_CMD(GET_LSA, 0x8, CXL_VARIABLE_PAYLOAD, 0),
> @@ -791,8 +793,8 @@ static int cxl_clear_event_record(struct cxl_dev_state *cxlds,
>   				 &payload, sizeof(payload), NULL, 0);
>   }
>   
> -static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds,
> -				    enum cxl_event_log_type type)
> +void cxl_mem_get_records_log(struct cxl_dev_state *cxlds,
> +			     enum cxl_event_log_type type)
>   {
>   	struct cxl_get_event_payload payload;
>   	u16 pl_nr;
> @@ -837,6 +839,7 @@ static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds,
>   	} while (pl_nr > CXL_GET_EVENT_NR_RECORDS ||
>   		 payload.flags & CXL_GET_EVENT_FLAG_MORE_RECORDS);
>   }
> +EXPORT_SYMBOL_NS_GPL(cxl_mem_get_records_log, CXL);
>   
>   /**
>    * cxl_mem_get_event_records - Get Event Records from the device
> @@ -867,6 +870,52 @@ void cxl_mem_get_event_records(struct cxl_dev_state *cxlds)
>   }
>   EXPORT_SYMBOL_NS_GPL(cxl_mem_get_event_records, CXL);
>   
> +int cxl_event_config_msgnums(struct cxl_dev_state *cxlds)
> +{
> +	struct cxl_event_interrupt_policy *policy = &cxlds->evt_int_policy;
> +	size_t policy_size = sizeof(*policy);
> +	bool retry = true;
> +	int rc;
> +
> +	policy->info_settings = CXL_INT_MSI_MSIX;
> +	policy->warn_settings = CXL_INT_MSI_MSIX;
> +	policy->failure_settings = CXL_INT_MSI_MSIX;
> +	policy->fatal_settings = CXL_INT_MSI_MSIX;
> +	policy->dyn_cap_settings = CXL_INT_MSI_MSIX;
> +
> +again:
> +	rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_SET_EVT_INT_POLICY,
> +			       policy, policy_size, NULL, 0);
> +	if (rc < 0) {
> +		/*
> +		 * If the device does not support dynamic capacity it may fail
> +		 * the command due to an invalid payload.  Retry without
> +		 * dynamic capacity.
> +		 */
> +		if (retry) {
> +			retry = false;
> +			policy->dyn_cap_settings = 0;
> +			policy_size = sizeof(*policy) - sizeof(policy->dyn_cap_settings);
> +			goto again;
> +		}
> +		dev_err(cxlds->dev, "Failed to set event interrupt policy : %d",
> +			rc);
> +		memset(policy, CXL_INT_NONE, sizeof(*policy));
> +		return rc;
> +	}

Up to you, but I think you can avoid the goto:

	int retry = 2;
	do {
		rc = cxl_mbox_send_cmd(...);
		if (rc == 0 || retry == 1)
			break;
		policy->dyn_cap_settings = 0;
		policy_size = sizeof(*policy) - sizeof(policy->dyn_cap_settings);
		retry--;
	} while (retry);

	if (rc < 0) {
		dev_err(...);
		memset(policy, ...);
		return rc;
	}

> +
> +	rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_EVT_INT_POLICY, NULL, 0,
> +			       policy, policy_size);
> +	if (rc < 0) {
> +		dev_err(cxlds->dev, "Failed to get event interrupt policy : %d",
> +			rc);
> +		return rc;
> +	}
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_event_config_msgnums, CXL);
> +
>   /**
>    * cxl_mem_get_partition_info - Get partition info
>    * @cxlds: The device data for the operation
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index 03da4f8f74d3..4d9c3ea30c24 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -179,6 +179,31 @@ struct cxl_endpoint_dvsec_info {
>   	struct range dvsec_range[2];
>   };
>   
> +/**
> + * Event Interrupt Policy
> + *
> + * CXL rev 3.0 section 8.2.9.2.4; Table 8-52
> + */
> +enum cxl_event_int_mode {
> +	CXL_INT_NONE		= 0x00,
> +	CXL_INT_MSI_MSIX	= 0x01,
> +	CXL_INT_FW		= 0x02
> +};
> +#define CXL_EVENT_INT_MODE_MASK 0x3
> +#define CXL_EVENT_INT_MSGNUM(setting) (((setting) & 0xf0) >> 4)
> +struct cxl_event_interrupt_policy {
> +	u8 info_settings;
> +	u8 warn_settings;
> +	u8 failure_settings;
> +	u8 fatal_settings;
> +	u8 dyn_cap_settings;
> +} __packed;
> +
> +static inline bool cxl_evt_int_is_msi(u8 setting)
> +{
> +	return CXL_INT_MSI_MSIX == (setting & CXL_EVENT_INT_MODE_MASK);
> +}
> +
>   /**
>    * struct cxl_dev_state - The driver device state
>    *
> @@ -246,6 +271,7 @@ struct cxl_dev_state {
>   
>   	resource_size_t component_reg_phys;
>   	u64 serial;
> +	struct cxl_event_interrupt_policy evt_int_policy;
>   
>   	struct xarray doe_mbs;
>   
> @@ -259,6 +285,8 @@ enum cxl_opcode {
>   	CXL_MBOX_OP_RAW			= CXL_MBOX_OP_INVALID,
>   	CXL_MBOX_OP_GET_EVENT_RECORD	= 0x0100,
>   	CXL_MBOX_OP_CLEAR_EVENT_RECORD	= 0x0101,
> +	CXL_MBOX_OP_GET_EVT_INT_POLICY	= 0x0102,
> +	CXL_MBOX_OP_SET_EVT_INT_POLICY	= 0x0103,
>   	CXL_MBOX_OP_GET_FW_INFO		= 0x0200,
>   	CXL_MBOX_OP_ACTIVATE_FW		= 0x0202,
>   	CXL_MBOX_OP_GET_SUPPORTED_LOGS	= 0x0400,
> @@ -539,7 +567,10 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds);
>   struct cxl_dev_state *cxl_dev_state_create(struct device *dev);
>   void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds);
>   void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds);
> +void cxl_mem_get_records_log(struct cxl_dev_state *cxlds,
> +			     enum cxl_event_log_type type);
>   void cxl_mem_get_event_records(struct cxl_dev_state *cxlds);
> +int cxl_event_config_msgnums(struct cxl_dev_state *cxlds);
>   #ifdef CONFIG_CXL_SUSPEND
>   void cxl_mem_active_inc(void);
>   void cxl_mem_active_dec(void);
> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index e0d511575b45..64b2e2671043 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -458,6 +458,138 @@ static void cxl_pci_alloc_irq_vectors(struct cxl_dev_state *cxlds)
>   	cxlds->nr_irq_vecs = nvecs;
>   }
>   
> +struct cxl_event_irq_id {
> +	struct cxl_dev_state *cxlds;
> +	u32 status;
> +	unsigned int msgnum;
> +};
> +
> +static irqreturn_t cxl_event_int_thread(int irq, void *id)
> +{
> +	struct cxl_event_irq_id *cxlid = id;
> +	struct cxl_dev_state *cxlds = cxlid->cxlds;
> +
> +	if (cxlid->status & CXLDEV_EVENT_STATUS_INFO)
> +		cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_INFO);
> +	if (cxlid->status & CXLDEV_EVENT_STATUS_WARN)
> +		cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_WARN);
> +	if (cxlid->status & CXLDEV_EVENT_STATUS_FAIL)
> +		cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FAIL);
> +	if (cxlid->status & CXLDEV_EVENT_STATUS_FATAL)
> +		cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FATAL);
> +	if (cxlid->status & CXLDEV_EVENT_STATUS_DYNAMIC_CAP)
> +		cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_DYNAMIC_CAP);
> +
> +	return IRQ_HANDLED;
> +}
> +
> +static irqreturn_t cxl_event_int_handler(int irq, void *id)
> +{
> +	struct cxl_event_irq_id *cxlid = id;
> +	struct cxl_dev_state *cxlds = cxlid->cxlds;
> +	u32 status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET);
> +
> +	if (cxlid->status & status)
> +		return IRQ_WAKE_THREAD;
> +	return IRQ_HANDLED;

IRQ_NONE since your handler did not handle anything and this is a shared 
interrupt?

> +}
> +
> +static void cxl_free_event_irq(void *id)
> +{
> +	struct cxl_event_irq_id *cxlid = id;
> +	struct pci_dev *pdev = to_pci_dev(cxlid->cxlds->dev);
> +
> +	pci_free_irq(pdev, cxlid->msgnum, id);
> +}
> +
> +static u32 log_type_to_status(enum cxl_event_log_type log_type)
> +{
> +	switch (log_type) {
> +	case CXL_EVENT_TYPE_INFO:
> +		return CXLDEV_EVENT_STATUS_INFO | CXLDEV_EVENT_STATUS_DYNAMIC_CAP;
> +	case CXL_EVENT_TYPE_WARN:
> +		return CXLDEV_EVENT_STATUS_WARN;
> +	case CXL_EVENT_TYPE_FAIL:
> +		return CXLDEV_EVENT_STATUS_FAIL;
> +	case CXL_EVENT_TYPE_FATAL:
> +		return CXLDEV_EVENT_STATUS_FATAL;
> +	default:
> +		break;
> +	}
> +	return 0;
> +}
> +
> +static int cxl_request_event_irq(struct cxl_dev_state *cxlds,
> +				 enum cxl_event_log_type log_type,
> +				 u8 setting)
> +{
> +	struct device *dev = cxlds->dev;
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	struct cxl_event_irq_id *id;
> +	unsigned int msgnum = CXL_EVENT_INT_MSGNUM(setting);
> +	int irq;

int rc? pci_request_irq() returns an errno or 0, not the number of irq. 
The variable naming is a bit confusing.

DJ

> +
> +	/* Disabled irq is not an error */
> +	if (!cxl_evt_int_is_msi(setting) || msgnum > cxlds->nr_irq_vecs) {
> +		dev_dbg(dev, "Event interrupt not enabled; %s %u %d\n",
> +			cxl_event_log_type_str(CXL_EVENT_TYPE_INFO),
> +			msgnum, cxlds->nr_irq_vecs);
> +		return 0;
> +	}
> +
> +	id = devm_kzalloc(dev, sizeof(*id), GFP_KERNEL);
> +	if (!id)
> +		return -ENOMEM;
> +
> +	id->cxlds = cxlds;
> +	id->msgnum = msgnum;
> +	id->status = log_type_to_status(log_type);
> +
> +	irq = pci_request_irq(pdev, id->msgnum, cxl_event_int_handler,
> +			      cxl_event_int_thread, id,
> +			      "%s:event-log-%s", dev_name(dev),
> +			      cxl_event_log_type_str(log_type));
> +	if (irq)
> +		return irq;
> +
> +	devm_add_action_or_reset(dev, cxl_free_event_irq, id);
> +	return 0;
> +}
> +
> +static void cxl_event_irqsetup(struct cxl_dev_state *cxlds)
> +{
> +	struct device *dev = cxlds->dev;
> +	u8 setting;
> +
> +	if (cxl_event_config_msgnums(cxlds))
> +		return;
> +
> +	/*
> +	 * Dynamic Capacity shares the info message number
> +	 * Nothing to be done except check the status bit in the
> +	 * irq thread.
> +	 */
> +	setting = cxlds->evt_int_policy.info_settings;
> +	if (cxl_request_event_irq(cxlds, CXL_EVENT_TYPE_INFO, setting))
> +		dev_err(dev, "Failed to get interrupt for %s event log\n",
> +			cxl_event_log_type_str(CXL_EVENT_TYPE_INFO));
> +
> +	setting = cxlds->evt_int_policy.warn_settings;
> +	if (cxl_request_event_irq(cxlds, CXL_EVENT_TYPE_WARN, setting))
> +		dev_err(dev, "Failed to get interrupt for %s event log\n",
> +			cxl_event_log_type_str(CXL_EVENT_TYPE_WARN));
> +
> +	setting = cxlds->evt_int_policy.failure_settings;
> +	if (cxl_request_event_irq(cxlds, CXL_EVENT_TYPE_FAIL, setting))
> +		dev_err(dev, "Failed to get interrupt for %s event log\n",
> +			cxl_event_log_type_str(CXL_EVENT_TYPE_FAIL));
> +
> +	setting = cxlds->evt_int_policy.fatal_settings;
> +	if (cxl_request_event_irq(cxlds, CXL_EVENT_TYPE_FATAL, setting))
> +		dev_err(dev, "Failed to get interrupt for %s event log\n",
> +			cxl_event_log_type_str(CXL_EVENT_TYPE_FATAL));
> +}
> +
>   static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>   {
>   	struct cxl_register_map map;
> @@ -525,6 +657,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>   		return rc;
>   
>   	cxl_pci_alloc_irq_vectors(cxlds);
> +	cxl_event_irqsetup(cxlds);
>   
>   	cxlmd = devm_cxl_add_memdev(cxlds);
>   	if (IS_ERR(cxlmd))
> diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h
> index 7c1ad8062792..a8204802fcca 100644
> --- a/include/uapi/linux/cxl_mem.h
> +++ b/include/uapi/linux/cxl_mem.h
> @@ -26,6 +26,8 @@
>   	___C(GET_SUPPORTED_LOGS, "Get Supported Logs"),                   \
>   	___C(GET_EVENT_RECORD, "Get Event Record"),                       \
>   	___C(CLEAR_EVENT_RECORD, "Clear Event Record"),                   \
> +	___C(GET_EVT_INT_POLICY, "Get Event Interrupt Policy"),           \
> +	___C(SET_EVT_INT_POLICY, "Set Event Interrupt Policy"),           \
>   	___C(GET_FW_INFO, "Get FW Info"),                                 \
>   	___C(GET_PARTITION_INFO, "Get Partition Information"),            \
>   	___C(GET_LSA, "Get Label Storage Area"),                          \
  
Jonathan Cameron Nov. 16, 2022, 2:40 p.m. UTC | #2
On Thu, 10 Nov 2022 10:57:55 -0800
ira.weiny@intel.com wrote:

> From: Ira Weiny <ira.weiny@intel.com>
> 
> CXL device events are signaled via interrupts.  Each event log may have
> a different interrupt message number.  These message numbers are
> reported in the Get Event Interrupt Policy mailbox command.
> 
> Add interrupt support for event logs.  Interrupts are allocated as
> shared interrupts.  Therefore, all or some event logs can share the same
> message number.
> 
> The driver must deal with the possibility that dynamic capacity is not
> yet supported by a device it sees.  Fallback and retry without dynamic
> capacity if the first attempt fails.
> 
> Device capacity event logs interrupt as part of the informational event
> log.  Check the event status to see which log has data.
> 
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> 
Hi Ira,

A few comments inline.

Thanks,

Jonathan

> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index 879b228a98a0..1e6762af2a00 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c

>  /**
>   * cxl_mem_get_event_records - Get Event Records from the device
> @@ -867,6 +870,52 @@ void cxl_mem_get_event_records(struct cxl_dev_state *cxlds)
>  }
>  EXPORT_SYMBOL_NS_GPL(cxl_mem_get_event_records, CXL);
>  
> +int cxl_event_config_msgnums(struct cxl_dev_state *cxlds)
> +{
> +	struct cxl_event_interrupt_policy *policy = &cxlds->evt_int_policy;
> +	size_t policy_size = sizeof(*policy);
> +	bool retry = true;
> +	int rc;
> +
> +	policy->info_settings = CXL_INT_MSI_MSIX;
> +	policy->warn_settings = CXL_INT_MSI_MSIX;
> +	policy->failure_settings = CXL_INT_MSI_MSIX;
> +	policy->fatal_settings = CXL_INT_MSI_MSIX;
> +	policy->dyn_cap_settings = CXL_INT_MSI_MSIX;
> +
> +again:
> +	rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_SET_EVT_INT_POLICY,
> +			       policy, policy_size, NULL, 0);
> +	if (rc < 0) {
> +		/*
> +		 * If the device does not support dynamic capacity it may fail
> +		 * the command due to an invalid payload.  Retry without
> +		 * dynamic capacity.
> +		 */

There are a number of ways to discover if DCD is supported that aren't based
on try and retry like this. 9.13.3 has "basic sequence to utilize Dynamic Capacity"
That calls out:
Verify the necessary Dynamic Capacity commands are returned in the CEL.

First I'm not sure we should set the interrupt on for DCD until we have a lot
more of the flow handled, secondly even then we should figure out if it is supported
at a higher level than this command and pass that info down here.


> +		if (retry) {
> +			retry = false;
> +			policy->dyn_cap_settings = 0;
> +			policy_size = sizeof(*policy) - sizeof(policy->dyn_cap_settings);
> +			goto again;
> +		}
> +		dev_err(cxlds->dev, "Failed to set event interrupt policy : %d",
> +			rc);
> +		memset(policy, CXL_INT_NONE, sizeof(*policy));

Relying on all the fields being 1 byte is a bit error prone. I'd just set them all
individually in the interests of more readable code.

> +		return rc;
> +	}
> +
> +	rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_EVT_INT_POLICY, NULL, 0,
> +			       policy, policy_size);

Add a comment on why you are reading this back (to get the msgnums in the upper
bits) as it's not obvious to a casual reader.

> +	if (rc < 0) {
> +		dev_err(cxlds->dev, "Failed to get event interrupt policy : %d",
> +			rc);
> +		return rc;
> +	}
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_event_config_msgnums, CXL);
> +

...

> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index e0d511575b45..64b2e2671043 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -458,6 +458,138 @@ static void cxl_pci_alloc_irq_vectors(struct cxl_dev_state *cxlds)
>  	cxlds->nr_irq_vecs = nvecs;
>  }
>  
> +struct cxl_event_irq_id {
> +	struct cxl_dev_state *cxlds;
> +	u32 status;
> +	unsigned int msgnum;
msgnum is only here for freeing the interrupt - I'd rather we fixed
that by using standard infrastructure (or adding some - see below).

status is an indirect way of allowing us to share an interrupt handler.
You could do that by registering a trivial wrapper for each instead.
Then all you have left is the cxl_dev_state which could be passed
in directly as the callback parameter removing need to have this
structure at all.  I think that might be neater.

> +};
> +
> +static irqreturn_t cxl_event_int_thread(int irq, void *id)
> +{
> +	struct cxl_event_irq_id *cxlid = id;
> +	struct cxl_dev_state *cxlds = cxlid->cxlds;
> +
> +	if (cxlid->status & CXLDEV_EVENT_STATUS_INFO)
> +		cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_INFO);
> +	if (cxlid->status & CXLDEV_EVENT_STATUS_WARN)
> +		cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_WARN);
> +	if (cxlid->status & CXLDEV_EVENT_STATUS_FAIL)
> +		cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FAIL);
> +	if (cxlid->status & CXLDEV_EVENT_STATUS_FATAL)
> +		cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FATAL);
> +	if (cxlid->status & CXLDEV_EVENT_STATUS_DYNAMIC_CAP)
> +		cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_DYNAMIC_CAP);
> +
> +	return IRQ_HANDLED;
> +}
> +
> +static irqreturn_t cxl_event_int_handler(int irq, void *id)
> +{
> +	struct cxl_event_irq_id *cxlid = id;
> +	struct cxl_dev_state *cxlds = cxlid->cxlds;
> +	u32 status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET);
> +
> +	if (cxlid->status & status)
> +		return IRQ_WAKE_THREAD;
> +	return IRQ_HANDLED;

If status not set IRQ_NONE.
Ah. I see Dave raised this as well.

> +}

...

> +static int cxl_request_event_irq(struct cxl_dev_state *cxlds,
> +				 enum cxl_event_log_type log_type,
> +				 u8 setting)
> +{
> +	struct device *dev = cxlds->dev;
> +	struct pci_dev *pdev = to_pci_dev(dev);
> +	struct cxl_event_irq_id *id;
> +	unsigned int msgnum = CXL_EVENT_INT_MSGNUM(setting);
> +	int irq;
> +
> +	/* Disabled irq is not an error */
> +	if (!cxl_evt_int_is_msi(setting) || msgnum > cxlds->nr_irq_vecs) {

I don't think that second condition can occur.  The language under table 8-52
(I think) means that it will move around if there aren't enough vectors
(for MSI - MSI-X is more complex, but result the same).

> +		dev_dbg(dev, "Event interrupt not enabled; %s %u %d\n",
> +			cxl_event_log_type_str(CXL_EVENT_TYPE_INFO),
> +			msgnum, cxlds->nr_irq_vecs);
> +		return 0;
> +	}
> +
> +	id = devm_kzalloc(dev, sizeof(*id), GFP_KERNEL);
> +	if (!id)
> +		return -ENOMEM;
> +
> +	id->cxlds = cxlds;
> +	id->msgnum = msgnum;
> +	id->status = log_type_to_status(log_type);
> +
> +	irq = pci_request_irq(pdev, id->msgnum, cxl_event_int_handler,
> +			      cxl_event_int_thread, id,
> +			      "%s:event-log-%s", dev_name(dev),
> +			      cxl_event_log_type_str(log_type));
> +	if (irq)
> +		return irq;
> +
> +	devm_add_action_or_reset(dev, cxl_free_event_irq, id);

Hmm. no pcim_request_irq()  maybe this is the time to propose one
(separate from this patch so we don't get delayed by that!)

We discussed this way back in DOE series (I'd forgotten but lore found
it for me).  There I suggested just calling
devm_request_threaded_irq() directly as a work around.

> +	return 0;
> +}
> +
> +static void cxl_event_irqsetup(struct cxl_dev_state *cxlds)
> +{
> +	struct device *dev = cxlds->dev;
> +	u8 setting;
> +
> +	if (cxl_event_config_msgnums(cxlds))
> +		return;
> +
> +	/*
> +	 * Dynamic Capacity shares the info message number
> +	 * Nothing to be done except check the status bit in the
> +	 * irq thread.
> +	 */
> +	setting = cxlds->evt_int_policy.info_settings;
> +	if (cxl_request_event_irq(cxlds, CXL_EVENT_TYPE_INFO, setting))
> +		dev_err(dev, "Failed to get interrupt for %s event log\n",
> +			cxl_event_log_type_str(CXL_EVENT_TYPE_INFO));
> +
> +	setting = cxlds->evt_int_policy.warn_settings;
> +	if (cxl_request_event_irq(cxlds, CXL_EVENT_TYPE_WARN, setting))
> +		dev_err(dev, "Failed to get interrupt for %s event log\n",
> +			cxl_event_log_type_str(CXL_EVENT_TYPE_WARN));
> +
> +	setting = cxlds->evt_int_policy.failure_settings;
> +	if (cxl_request_event_irq(cxlds, CXL_EVENT_TYPE_FAIL, setting))
> +		dev_err(dev, "Failed to get interrupt for %s event log\n",
> +			cxl_event_log_type_str(CXL_EVENT_TYPE_FAIL));
> +
> +	setting = cxlds->evt_int_policy.fatal_settings;
> +	if (cxl_request_event_irq(cxlds, CXL_EVENT_TYPE_FATAL, setting))
> +		dev_err(dev, "Failed to get interrupt for %s event log\n",
> +			cxl_event_log_type_str(CXL_EVENT_TYPE_FATAL));
> +}
  
Ira Weiny Nov. 17, 2022, 1:38 a.m. UTC | #3
On Tue, Nov 15, 2022 at 04:13:24PM -0700, Jiang, Dave wrote:
> 
> 
> On 11/10/2022 10:57 AM, ira.weiny@intel.com wrote:

[snip]

> > +int cxl_event_config_msgnums(struct cxl_dev_state *cxlds)
> > +{
> > +	struct cxl_event_interrupt_policy *policy = &cxlds->evt_int_policy;
> > +	size_t policy_size = sizeof(*policy);
> > +	bool retry = true;
> > +	int rc;
> > +
> > +	policy->info_settings = CXL_INT_MSI_MSIX;
> > +	policy->warn_settings = CXL_INT_MSI_MSIX;
> > +	policy->failure_settings = CXL_INT_MSI_MSIX;
> > +	policy->fatal_settings = CXL_INT_MSI_MSIX;
> > +	policy->dyn_cap_settings = CXL_INT_MSI_MSIX;
> > +
> > +again:
> > +	rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_SET_EVT_INT_POLICY,
> > +			       policy, policy_size, NULL, 0);
> > +	if (rc < 0) {
> > +		/*
> > +		 * If the device does not support dynamic capacity it may fail
> > +		 * the command due to an invalid payload.  Retry without
> > +		 * dynamic capacity.
> > +		 */
> > +		if (retry) {
> > +			retry = false;
> > +			policy->dyn_cap_settings = 0;
> > +			policy_size = sizeof(*policy) - sizeof(policy->dyn_cap_settings);
> > +			goto again;
> > +		}
> > +		dev_err(cxlds->dev, "Failed to set event interrupt policy : %d",
> > +			rc);
> > +		memset(policy, CXL_INT_NONE, sizeof(*policy));
> > +		return rc;
> > +	}
> 
> Up to you, but I think you can avoid the goto:

I think this is a bit more confusing because we are not really retrying 2
times.

> 
> 	int retry = 2;
> 	do {
> 		rc = cxl_mbox_send_cmd(...);
> 		if (rc == 0 || retry == 1)

Specifically this looks confusing to me.  Why break on retry == 1?

> 			break;
> 		policy->dyn_cap_settings = 0;
> 		policy_size = sizeof(*policy) - sizeof(policy->dyn_cap_settings);
> 		retry--;
> 	} while (retry);
> 
> 	if (rc < 0) {
> 		dev_err(...);
> 		memset(policy, ...);
> 		return rc;
> 	}

That said perhaps the retry should be based on policy_size...  :-/  I'm not
sure that adds much.  I'm going to leave it as is.

[snip]

> > +
> > +static irqreturn_t cxl_event_int_handler(int irq, void *id)
> > +{
> > +	struct cxl_event_irq_id *cxlid = id;
> > +	struct cxl_dev_state *cxlds = cxlid->cxlds;
> > +	u32 status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET);
> > +
> > +	if (cxlid->status & status)
> > +		return IRQ_WAKE_THREAD;
> > +	return IRQ_HANDLED;
> 
> IRQ_NONE since your handler did not handle anything and this is a shared
> interrupt?

Yes.  Good catch thanks!

> 
> > +}
> > +
> > +static void cxl_free_event_irq(void *id)
> > +{
> > +	struct cxl_event_irq_id *cxlid = id;
> > +	struct pci_dev *pdev = to_pci_dev(cxlid->cxlds->dev);
> > +
> > +	pci_free_irq(pdev, cxlid->msgnum, id);
> > +}
> > +
> > +static u32 log_type_to_status(enum cxl_event_log_type log_type)
> > +{
> > +	switch (log_type) {
> > +	case CXL_EVENT_TYPE_INFO:
> > +		return CXLDEV_EVENT_STATUS_INFO | CXLDEV_EVENT_STATUS_DYNAMIC_CAP;
> > +	case CXL_EVENT_TYPE_WARN:
> > +		return CXLDEV_EVENT_STATUS_WARN;
> > +	case CXL_EVENT_TYPE_FAIL:
> > +		return CXLDEV_EVENT_STATUS_FAIL;
> > +	case CXL_EVENT_TYPE_FATAL:
> > +		return CXLDEV_EVENT_STATUS_FATAL;
> > +	default:
> > +		break;
> > +	}
> > +	return 0;
> > +}
> > +
> > +static int cxl_request_event_irq(struct cxl_dev_state *cxlds,
> > +				 enum cxl_event_log_type log_type,
> > +				 u8 setting)
> > +{
> > +	struct device *dev = cxlds->dev;
> > +	struct pci_dev *pdev = to_pci_dev(dev);
> > +	struct cxl_event_irq_id *id;
> > +	unsigned int msgnum = CXL_EVENT_INT_MSGNUM(setting);
> > +	int irq;
> 
> int rc? pci_request_irq() returns an errno or 0, not the number of irq. The
> variable naming is a bit confusing.

Indeed.  Changed, and thanks,
Ira

> 
> DJ
>
  
Ira Weiny Nov. 30, 2022, 9:11 a.m. UTC | #4
On Wed, Nov 16, 2022 at 02:40:21PM +0000, Jonathan Cameron wrote:
> On Thu, 10 Nov 2022 10:57:55 -0800
> ira.weiny@intel.com wrote:
> 
> > From: Ira Weiny <ira.weiny@intel.com>
> > 
> > CXL device events are signaled via interrupts.  Each event log may have
> > a different interrupt message number.  These message numbers are
> > reported in the Get Event Interrupt Policy mailbox command.
> > 
> > Add interrupt support for event logs.  Interrupts are allocated as
> > shared interrupts.  Therefore, all or some event logs can share the same
> > message number.
> > 
> > The driver must deal with the possibility that dynamic capacity is not
> > yet supported by a device it sees.  Fallback and retry without dynamic
> > capacity if the first attempt fails.
> > 
> > Device capacity event logs interrupt as part of the informational event
> > log.  Check the event status to see which log has data.
> > 
> > Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> > 
> Hi Ira,
> 
> A few comments inline.

Thanks for the review!

> 
> Thanks,
> 
> Jonathan
> 
> > diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> > index 879b228a98a0..1e6762af2a00 100644
> > --- a/drivers/cxl/core/mbox.c
> > +++ b/drivers/cxl/core/mbox.c
> 
> >  /**
> >   * cxl_mem_get_event_records - Get Event Records from the device
> > @@ -867,6 +870,52 @@ void cxl_mem_get_event_records(struct cxl_dev_state *cxlds)
> >  }
> >  EXPORT_SYMBOL_NS_GPL(cxl_mem_get_event_records, CXL);
> >  
> > +int cxl_event_config_msgnums(struct cxl_dev_state *cxlds)
> > +{
> > +	struct cxl_event_interrupt_policy *policy = &cxlds->evt_int_policy;
> > +	size_t policy_size = sizeof(*policy);
> > +	bool retry = true;
> > +	int rc;
> > +
> > +	policy->info_settings = CXL_INT_MSI_MSIX;
> > +	policy->warn_settings = CXL_INT_MSI_MSIX;
> > +	policy->failure_settings = CXL_INT_MSI_MSIX;
> > +	policy->fatal_settings = CXL_INT_MSI_MSIX;
> > +	policy->dyn_cap_settings = CXL_INT_MSI_MSIX;
> > +
> > +again:
> > +	rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_SET_EVT_INT_POLICY,
> > +			       policy, policy_size, NULL, 0);
> > +	if (rc < 0) {
> > +		/*
> > +		 * If the device does not support dynamic capacity it may fail
> > +		 * the command due to an invalid payload.  Retry without
> > +		 * dynamic capacity.
> > +		 */
> 
> There are a number of ways to discover if DCD is supported that aren't based
> on try and retry like this. 9.13.3 has "basic sequence to utilize Dynamic Capacity"
> That calls out:
> Verify the necessary Dynamic Capacity commands are returned in the CEL.
> 
> First I'm not sure we should set the interrupt on for DCD until we have a lot
> more of the flow handled, secondly even then we should figure out if it is supported
> at a higher level than this command and pass that info down here.

I'm not sure I really agree.  The events are just traced.  I think this
functionality is really orthogonal to if any other support for DCD is there.

Regardless like I said in the call I think deferring this is the right way to
go for now.

> 
> 
> > +		if (retry) {
> > +			retry = false;
> > +			policy->dyn_cap_settings = 0;
> > +			policy_size = sizeof(*policy) - sizeof(policy->dyn_cap_settings);
> > +			goto again;
> > +		}
> > +		dev_err(cxlds->dev, "Failed to set event interrupt policy : %d",
> > +			rc);
> > +		memset(policy, CXL_INT_NONE, sizeof(*policy));
> 
> Relying on all the fields being 1 byte is a bit error prone. I'd just set them all
> individually in the interests of more readable code.

Done.

> 
> > +		return rc;
> > +	}
> > +
> > +	rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_EVT_INT_POLICY, NULL, 0,
> > +			       policy, policy_size);
> 
> Add a comment on why you are reading this back (to get the msgnums in the upper
> bits) as it's not obvious to a casual reader.

Done.

> 
> > +	if (rc < 0) {
> > +		dev_err(cxlds->dev, "Failed to get event interrupt policy : %d",
> > +			rc);
> > +		return rc;
> > +	}
> > +
> > +	return 0;
> > +}
> > +EXPORT_SYMBOL_NS_GPL(cxl_event_config_msgnums, CXL);
> > +
> 
> ...
> 
> > diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> > index e0d511575b45..64b2e2671043 100644
> > --- a/drivers/cxl/pci.c
> > +++ b/drivers/cxl/pci.c
> > @@ -458,6 +458,138 @@ static void cxl_pci_alloc_irq_vectors(struct cxl_dev_state *cxlds)
> >  	cxlds->nr_irq_vecs = nvecs;
> >  }
> >  
> > +struct cxl_event_irq_id {
> > +	struct cxl_dev_state *cxlds;
> > +	u32 status;
> > +	unsigned int msgnum;
> msgnum is only here for freeing the interrupt - I'd rather we fixed
> that by using standard infrastructure (or adding some - see below).
> 
> status is an indirect way of allowing us to share an interrupt handler.
> You could do that by registering a trivial wrapper for each instead.
> Then all you have left is the cxl_dev_state which could be passed
> in directly as the callback parameter removing need to have this
> structure at all.  I think that might be neater.

It does prevent the alloc of this structure which I like.

I've made the change.

> 
> > +};
> > +
> > +static irqreturn_t cxl_event_int_thread(int irq, void *id)
> > +{
> > +	struct cxl_event_irq_id *cxlid = id;
> > +	struct cxl_dev_state *cxlds = cxlid->cxlds;
> > +
> > +	if (cxlid->status & CXLDEV_EVENT_STATUS_INFO)
> > +		cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_INFO);
> > +	if (cxlid->status & CXLDEV_EVENT_STATUS_WARN)
> > +		cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_WARN);
> > +	if (cxlid->status & CXLDEV_EVENT_STATUS_FAIL)
> > +		cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FAIL);
> > +	if (cxlid->status & CXLDEV_EVENT_STATUS_FATAL)
> > +		cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FATAL);
> > +	if (cxlid->status & CXLDEV_EVENT_STATUS_DYNAMIC_CAP)
> > +		cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_DYNAMIC_CAP);
> > +
> > +	return IRQ_HANDLED;
> > +}
> > +
> > +static irqreturn_t cxl_event_int_handler(int irq, void *id)
> > +{
> > +	struct cxl_event_irq_id *cxlid = id;
> > +	struct cxl_dev_state *cxlds = cxlid->cxlds;
> > +	u32 status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET);
> > +
> > +	if (cxlid->status & status)
> > +		return IRQ_WAKE_THREAD;
> > +	return IRQ_HANDLED;
> 
> If status not set IRQ_NONE.
> Ah. I see Dave raised this as well.

Yep done.

> 
> > +}
> 
> ...
> 
> > +static int cxl_request_event_irq(struct cxl_dev_state *cxlds,
> > +				 enum cxl_event_log_type log_type,
> > +				 u8 setting)
> > +{
> > +	struct device *dev = cxlds->dev;
> > +	struct pci_dev *pdev = to_pci_dev(dev);
> > +	struct cxl_event_irq_id *id;
> > +	unsigned int msgnum = CXL_EVENT_INT_MSGNUM(setting);
> > +	int irq;
> > +
> > +	/* Disabled irq is not an error */
> > +	if (!cxl_evt_int_is_msi(setting) || msgnum > cxlds->nr_irq_vecs) {
> 
> I don't think that second condition can occur.  The language under table 8-52
> (I think) means that it will move around if there aren't enough vectors
> (for MSI - MSI-X is more complex, but result the same).

Based on the other review this is just a bool msi_enabled which is used to
determine if this should be set up at all.

> 
> > +		dev_dbg(dev, "Event interrupt not enabled; %s %u %d\n",
> > +			cxl_event_log_type_str(CXL_EVENT_TYPE_INFO),
> > +			msgnum, cxlds->nr_irq_vecs);
> > +		return 0;
> > +	}
> > +
> > +	id = devm_kzalloc(dev, sizeof(*id), GFP_KERNEL);
> > +	if (!id)
> > +		return -ENOMEM;
> > +
> > +	id->cxlds = cxlds;
> > +	id->msgnum = msgnum;
> > +	id->status = log_type_to_status(log_type);
> > +
> > +	irq = pci_request_irq(pdev, id->msgnum, cxl_event_int_handler,
> > +			      cxl_event_int_thread, id,
> > +			      "%s:event-log-%s", dev_name(dev),
> > +			      cxl_event_log_type_str(log_type));
> > +	if (irq)
> > +		return irq;
> > +
> > +	devm_add_action_or_reset(dev, cxl_free_event_irq, id);
> 
> Hmm. no pcim_request_irq()  maybe this is the time to propose one
> (separate from this patch so we don't get delayed by that!)

Perhaps.  But not tonight...  ;-)

> 
> We discussed this way back in DOE series (I'd forgotten but lore found
> it for me).  There I suggested just calling
> devm_request_threaded_irq() directly as a work around.

Yea that works fine.  One issue is we lose the format printing of the irq name:

...
 29:  ...  PCI-MSI 100663300-edge      0000:c0:00.0:event-log-Fatal
 30:  ...  PCI-MSI 100663301-edge      0000:c0:00.0:event-log-Failure
 31:  ...  PCI-MSI 100663302-edge      0000:c0:00.0:event-log-Warning
 32:  ...  PCI-MSI 100663303-edge      0000:c0:00.0:event-log-Informational
...

Thanks,
Ira

> 
> > +	return 0;
> > +}
> > +
> > +static void cxl_event_irqsetup(struct cxl_dev_state *cxlds)
> > +{
> > +	struct device *dev = cxlds->dev;
> > +	u8 setting;
> > +
> > +	if (cxl_event_config_msgnums(cxlds))
> > +		return;
> > +
> > +	/*
> > +	 * Dynamic Capacity shares the info message number
> > +	 * Nothing to be done except check the status bit in the
> > +	 * irq thread.
> > +	 */
> > +	setting = cxlds->evt_int_policy.info_settings;
> > +	if (cxl_request_event_irq(cxlds, CXL_EVENT_TYPE_INFO, setting))
> > +		dev_err(dev, "Failed to get interrupt for %s event log\n",
> > +			cxl_event_log_type_str(CXL_EVENT_TYPE_INFO));
> > +
> > +	setting = cxlds->evt_int_policy.warn_settings;
> > +	if (cxl_request_event_irq(cxlds, CXL_EVENT_TYPE_WARN, setting))
> > +		dev_err(dev, "Failed to get interrupt for %s event log\n",
> > +			cxl_event_log_type_str(CXL_EVENT_TYPE_WARN));
> > +
> > +	setting = cxlds->evt_int_policy.failure_settings;
> > +	if (cxl_request_event_irq(cxlds, CXL_EVENT_TYPE_FAIL, setting))
> > +		dev_err(dev, "Failed to get interrupt for %s event log\n",
> > +			cxl_event_log_type_str(CXL_EVENT_TYPE_FAIL));
> > +
> > +	setting = cxlds->evt_int_policy.fatal_settings;
> > +	if (cxl_request_event_irq(cxlds, CXL_EVENT_TYPE_FATAL, setting))
> > +		dev_err(dev, "Failed to get interrupt for %s event log\n",
> > +			cxl_event_log_type_str(CXL_EVENT_TYPE_FATAL));
> > +}
>
  

Patch

diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index 879b228a98a0..1e6762af2a00 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -53,6 +53,8 @@  static struct cxl_mem_command cxl_mem_commands[CXL_MEM_COMMAND_ID_MAX] = {
 	CXL_CMD(GET_SUPPORTED_LOGS, 0, CXL_VARIABLE_PAYLOAD, CXL_CMD_FLAG_FORCE_ENABLE),
 	CXL_CMD(GET_EVENT_RECORD, 1, CXL_VARIABLE_PAYLOAD, 0),
 	CXL_CMD(CLEAR_EVENT_RECORD, CXL_VARIABLE_PAYLOAD, 0, 0),
+	CXL_CMD(GET_EVT_INT_POLICY, 0, 0x5, 0),
+	CXL_CMD(SET_EVT_INT_POLICY, 0x5, 0, 0),
 	CXL_CMD(GET_FW_INFO, 0, 0x50, 0),
 	CXL_CMD(GET_PARTITION_INFO, 0, 0x20, 0),
 	CXL_CMD(GET_LSA, 0x8, CXL_VARIABLE_PAYLOAD, 0),
@@ -791,8 +793,8 @@  static int cxl_clear_event_record(struct cxl_dev_state *cxlds,
 				 &payload, sizeof(payload), NULL, 0);
 }
 
-static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds,
-				    enum cxl_event_log_type type)
+void cxl_mem_get_records_log(struct cxl_dev_state *cxlds,
+			     enum cxl_event_log_type type)
 {
 	struct cxl_get_event_payload payload;
 	u16 pl_nr;
@@ -837,6 +839,7 @@  static void cxl_mem_get_records_log(struct cxl_dev_state *cxlds,
 	} while (pl_nr > CXL_GET_EVENT_NR_RECORDS ||
 		 payload.flags & CXL_GET_EVENT_FLAG_MORE_RECORDS);
 }
+EXPORT_SYMBOL_NS_GPL(cxl_mem_get_records_log, CXL);
 
 /**
  * cxl_mem_get_event_records - Get Event Records from the device
@@ -867,6 +870,52 @@  void cxl_mem_get_event_records(struct cxl_dev_state *cxlds)
 }
 EXPORT_SYMBOL_NS_GPL(cxl_mem_get_event_records, CXL);
 
+int cxl_event_config_msgnums(struct cxl_dev_state *cxlds)
+{
+	struct cxl_event_interrupt_policy *policy = &cxlds->evt_int_policy;
+	size_t policy_size = sizeof(*policy);
+	bool retry = true;
+	int rc;
+
+	policy->info_settings = CXL_INT_MSI_MSIX;
+	policy->warn_settings = CXL_INT_MSI_MSIX;
+	policy->failure_settings = CXL_INT_MSI_MSIX;
+	policy->fatal_settings = CXL_INT_MSI_MSIX;
+	policy->dyn_cap_settings = CXL_INT_MSI_MSIX;
+
+again:
+	rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_SET_EVT_INT_POLICY,
+			       policy, policy_size, NULL, 0);
+	if (rc < 0) {
+		/*
+		 * If the device does not support dynamic capacity it may fail
+		 * the command due to an invalid payload.  Retry without
+		 * dynamic capacity.
+		 */
+		if (retry) {
+			retry = false;
+			policy->dyn_cap_settings = 0;
+			policy_size = sizeof(*policy) - sizeof(policy->dyn_cap_settings);
+			goto again;
+		}
+		dev_err(cxlds->dev, "Failed to set event interrupt policy : %d",
+			rc);
+		memset(policy, CXL_INT_NONE, sizeof(*policy));
+		return rc;
+	}
+
+	rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_EVT_INT_POLICY, NULL, 0,
+			       policy, policy_size);
+	if (rc < 0) {
+		dev_err(cxlds->dev, "Failed to get event interrupt policy : %d",
+			rc);
+		return rc;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_event_config_msgnums, CXL);
+
 /**
  * cxl_mem_get_partition_info - Get partition info
  * @cxlds: The device data for the operation
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 03da4f8f74d3..4d9c3ea30c24 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -179,6 +179,31 @@  struct cxl_endpoint_dvsec_info {
 	struct range dvsec_range[2];
 };
 
+/**
+ * Event Interrupt Policy
+ *
+ * CXL rev 3.0 section 8.2.9.2.4; Table 8-52
+ */
+enum cxl_event_int_mode {
+	CXL_INT_NONE		= 0x00,
+	CXL_INT_MSI_MSIX	= 0x01,
+	CXL_INT_FW		= 0x02
+};
+#define CXL_EVENT_INT_MODE_MASK 0x3
+#define CXL_EVENT_INT_MSGNUM(setting) (((setting) & 0xf0) >> 4)
+struct cxl_event_interrupt_policy {
+	u8 info_settings;
+	u8 warn_settings;
+	u8 failure_settings;
+	u8 fatal_settings;
+	u8 dyn_cap_settings;
+} __packed;
+
+static inline bool cxl_evt_int_is_msi(u8 setting)
+{
+	return CXL_INT_MSI_MSIX == (setting & CXL_EVENT_INT_MODE_MASK);
+}
+
 /**
  * struct cxl_dev_state - The driver device state
  *
@@ -246,6 +271,7 @@  struct cxl_dev_state {
 
 	resource_size_t component_reg_phys;
 	u64 serial;
+	struct cxl_event_interrupt_policy evt_int_policy;
 
 	struct xarray doe_mbs;
 
@@ -259,6 +285,8 @@  enum cxl_opcode {
 	CXL_MBOX_OP_RAW			= CXL_MBOX_OP_INVALID,
 	CXL_MBOX_OP_GET_EVENT_RECORD	= 0x0100,
 	CXL_MBOX_OP_CLEAR_EVENT_RECORD	= 0x0101,
+	CXL_MBOX_OP_GET_EVT_INT_POLICY	= 0x0102,
+	CXL_MBOX_OP_SET_EVT_INT_POLICY	= 0x0103,
 	CXL_MBOX_OP_GET_FW_INFO		= 0x0200,
 	CXL_MBOX_OP_ACTIVATE_FW		= 0x0202,
 	CXL_MBOX_OP_GET_SUPPORTED_LOGS	= 0x0400,
@@ -539,7 +567,10 @@  int cxl_mem_create_range_info(struct cxl_dev_state *cxlds);
 struct cxl_dev_state *cxl_dev_state_create(struct device *dev);
 void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds);
 void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds);
+void cxl_mem_get_records_log(struct cxl_dev_state *cxlds,
+			     enum cxl_event_log_type type);
 void cxl_mem_get_event_records(struct cxl_dev_state *cxlds);
+int cxl_event_config_msgnums(struct cxl_dev_state *cxlds);
 #ifdef CONFIG_CXL_SUSPEND
 void cxl_mem_active_inc(void);
 void cxl_mem_active_dec(void);
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index e0d511575b45..64b2e2671043 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -458,6 +458,138 @@  static void cxl_pci_alloc_irq_vectors(struct cxl_dev_state *cxlds)
 	cxlds->nr_irq_vecs = nvecs;
 }
 
+struct cxl_event_irq_id {
+	struct cxl_dev_state *cxlds;
+	u32 status;
+	unsigned int msgnum;
+};
+
+static irqreturn_t cxl_event_int_thread(int irq, void *id)
+{
+	struct cxl_event_irq_id *cxlid = id;
+	struct cxl_dev_state *cxlds = cxlid->cxlds;
+
+	if (cxlid->status & CXLDEV_EVENT_STATUS_INFO)
+		cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_INFO);
+	if (cxlid->status & CXLDEV_EVENT_STATUS_WARN)
+		cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_WARN);
+	if (cxlid->status & CXLDEV_EVENT_STATUS_FAIL)
+		cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FAIL);
+	if (cxlid->status & CXLDEV_EVENT_STATUS_FATAL)
+		cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_FATAL);
+	if (cxlid->status & CXLDEV_EVENT_STATUS_DYNAMIC_CAP)
+		cxl_mem_get_records_log(cxlds, CXL_EVENT_TYPE_DYNAMIC_CAP);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t cxl_event_int_handler(int irq, void *id)
+{
+	struct cxl_event_irq_id *cxlid = id;
+	struct cxl_dev_state *cxlds = cxlid->cxlds;
+	u32 status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET);
+
+	if (cxlid->status & status)
+		return IRQ_WAKE_THREAD;
+	return IRQ_HANDLED;
+}
+
+static void cxl_free_event_irq(void *id)
+{
+	struct cxl_event_irq_id *cxlid = id;
+	struct pci_dev *pdev = to_pci_dev(cxlid->cxlds->dev);
+
+	pci_free_irq(pdev, cxlid->msgnum, id);
+}
+
+static u32 log_type_to_status(enum cxl_event_log_type log_type)
+{
+	switch (log_type) {
+	case CXL_EVENT_TYPE_INFO:
+		return CXLDEV_EVENT_STATUS_INFO | CXLDEV_EVENT_STATUS_DYNAMIC_CAP;
+	case CXL_EVENT_TYPE_WARN:
+		return CXLDEV_EVENT_STATUS_WARN;
+	case CXL_EVENT_TYPE_FAIL:
+		return CXLDEV_EVENT_STATUS_FAIL;
+	case CXL_EVENT_TYPE_FATAL:
+		return CXLDEV_EVENT_STATUS_FATAL;
+	default:
+		break;
+	}
+	return 0;
+}
+
+static int cxl_request_event_irq(struct cxl_dev_state *cxlds,
+				 enum cxl_event_log_type log_type,
+				 u8 setting)
+{
+	struct device *dev = cxlds->dev;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct cxl_event_irq_id *id;
+	unsigned int msgnum = CXL_EVENT_INT_MSGNUM(setting);
+	int irq;
+
+	/* Disabled irq is not an error */
+	if (!cxl_evt_int_is_msi(setting) || msgnum > cxlds->nr_irq_vecs) {
+		dev_dbg(dev, "Event interrupt not enabled; %s %u %d\n",
+			cxl_event_log_type_str(CXL_EVENT_TYPE_INFO),
+			msgnum, cxlds->nr_irq_vecs);
+		return 0;
+	}
+
+	id = devm_kzalloc(dev, sizeof(*id), GFP_KERNEL);
+	if (!id)
+		return -ENOMEM;
+
+	id->cxlds = cxlds;
+	id->msgnum = msgnum;
+	id->status = log_type_to_status(log_type);
+
+	irq = pci_request_irq(pdev, id->msgnum, cxl_event_int_handler,
+			      cxl_event_int_thread, id,
+			      "%s:event-log-%s", dev_name(dev),
+			      cxl_event_log_type_str(log_type));
+	if (irq)
+		return irq;
+
+	devm_add_action_or_reset(dev, cxl_free_event_irq, id);
+	return 0;
+}
+
+static void cxl_event_irqsetup(struct cxl_dev_state *cxlds)
+{
+	struct device *dev = cxlds->dev;
+	u8 setting;
+
+	if (cxl_event_config_msgnums(cxlds))
+		return;
+
+	/*
+	 * Dynamic Capacity shares the info message number
+	 * Nothing to be done except check the status bit in the
+	 * irq thread.
+	 */
+	setting = cxlds->evt_int_policy.info_settings;
+	if (cxl_request_event_irq(cxlds, CXL_EVENT_TYPE_INFO, setting))
+		dev_err(dev, "Failed to get interrupt for %s event log\n",
+			cxl_event_log_type_str(CXL_EVENT_TYPE_INFO));
+
+	setting = cxlds->evt_int_policy.warn_settings;
+	if (cxl_request_event_irq(cxlds, CXL_EVENT_TYPE_WARN, setting))
+		dev_err(dev, "Failed to get interrupt for %s event log\n",
+			cxl_event_log_type_str(CXL_EVENT_TYPE_WARN));
+
+	setting = cxlds->evt_int_policy.failure_settings;
+	if (cxl_request_event_irq(cxlds, CXL_EVENT_TYPE_FAIL, setting))
+		dev_err(dev, "Failed to get interrupt for %s event log\n",
+			cxl_event_log_type_str(CXL_EVENT_TYPE_FAIL));
+
+	setting = cxlds->evt_int_policy.fatal_settings;
+	if (cxl_request_event_irq(cxlds, CXL_EVENT_TYPE_FATAL, setting))
+		dev_err(dev, "Failed to get interrupt for %s event log\n",
+			cxl_event_log_type_str(CXL_EVENT_TYPE_FATAL));
+}
+
 static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct cxl_register_map map;
@@ -525,6 +657,7 @@  static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		return rc;
 
 	cxl_pci_alloc_irq_vectors(cxlds);
+	cxl_event_irqsetup(cxlds);
 
 	cxlmd = devm_cxl_add_memdev(cxlds);
 	if (IS_ERR(cxlmd))
diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h
index 7c1ad8062792..a8204802fcca 100644
--- a/include/uapi/linux/cxl_mem.h
+++ b/include/uapi/linux/cxl_mem.h
@@ -26,6 +26,8 @@ 
 	___C(GET_SUPPORTED_LOGS, "Get Supported Logs"),                   \
 	___C(GET_EVENT_RECORD, "Get Event Record"),                       \
 	___C(CLEAR_EVENT_RECORD, "Clear Event Record"),                   \
+	___C(GET_EVT_INT_POLICY, "Get Event Interrupt Policy"),           \
+	___C(SET_EVT_INT_POLICY, "Set Event Interrupt Policy"),           \
 	___C(GET_FW_INFO, "Get FW Info"),                                 \
 	___C(GET_PARTITION_INFO, "Get Partition Information"),            \
 	___C(GET_LSA, "Get Label Storage Area"),                          \