[v12,19/25] irqchip/riscv-imsic: Add device MSI domain support for platform devices

Message ID 20240127161753.114685-20-apatel@ventanamicro.com
State New
Headers
Series Linux RISC-V AIA Support |

Commit Message

Anup Patel Jan. 27, 2024, 4:17 p.m. UTC
  The Linux platform MSI support allows per-device MSI domains so let
us add a platform irqchip driver for RISC-V IMSIC which provides a
base IRQ domain with MSI parent support for platform device domains.

This driver assumes that the IMSIC state is already initialized by
the IMSIC early driver.

Signed-off-by: Anup Patel <apatel@ventanamicro.com>
---
 drivers/irqchip/Makefile                   |   2 +-
 drivers/irqchip/irq-riscv-imsic-platform.c | 371 +++++++++++++++++++++
 drivers/irqchip/irq-riscv-imsic-state.h    |   2 +-
 3 files changed, 373 insertions(+), 2 deletions(-)
 create mode 100644 drivers/irqchip/irq-riscv-imsic-platform.c
  

Comments

Björn Töpel Feb. 6, 2024, 3:36 p.m. UTC | #1
Anup Patel <apatel@ventanamicro.com> writes:

> The Linux platform MSI support allows per-device MSI domains so let
> us add a platform irqchip driver for RISC-V IMSIC which provides a
> base IRQ domain with MSI parent support for platform device domains.
>
> This driver assumes that the IMSIC state is already initialized by
> the IMSIC early driver.
>
> Signed-off-by: Anup Patel <apatel@ventanamicro.com>

[...]

> diff --git a/drivers/irqchip/irq-riscv-imsic-platform.c b/drivers/irqchip/irq-riscv-imsic-platform.c
> new file mode 100644
> index 000000000000..65791a6b0727
> --- /dev/null
> +++ b/drivers/irqchip/irq-riscv-imsic-platform.c
> @@ -0,0 +1,371 @@

[...]

> +static int imsic_irq_retrigger(struct irq_data *d)
> +{
> +	struct imsic_vector *vec = irq_data_get_irq_chip_data(d);
> +	struct imsic_local_config *local;
> +
> +	if (WARN_ON(vec == NULL))
> +		return -ENOENT;
> +
> +	local = per_cpu_ptr(imsic->global.local, vec->cpu);
> +	writel(vec->local_id, local->msi_va);

Change to writel_relaxed().
  
Thomas Gleixner Feb. 16, 2024, 8:12 p.m. UTC | #2
On Sat, Jan 27 2024 at 21:47, Anup Patel wrote:
> +static int imsic_cpu_page_phys(unsigned int cpu,
> +			       unsigned int guest_index,
> +			       phys_addr_t *out_msi_pa)
> +{
> +	struct imsic_global_config *global;
> +	struct imsic_local_config *local;
> +
> +	global = &imsic->global;
> +	local = per_cpu_ptr(global->local, cpu);
> +
> +	if (BIT(global->guest_index_bits) <= guest_index)
> +		return -EINVAL;

As the callsite does not care about the return value, just make this
function boolean and return true on success.

> +	if (out_msi_pa)
> +		*out_msi_pa = local->msi_pa +
> +			      (guest_index * IMSIC_MMIO_PAGE_SZ);
> +
> +	return 0;
> +}
> +
> +static void imsic_irq_mask(struct irq_data *d)
> +{
> +	imsic_vector_mask(irq_data_get_irq_chip_data(d));
> +}
> +
> +static void imsic_irq_unmask(struct irq_data *d)
> +{
> +	imsic_vector_unmask(irq_data_get_irq_chip_data(d));
> +}
> +
> +static int imsic_irq_retrigger(struct irq_data *d)
> +{
> +	struct imsic_vector *vec = irq_data_get_irq_chip_data(d);
> +	struct imsic_local_config *local;
> +
> +	if (WARN_ON(vec == NULL))
> +		return -ENOENT;
> +
> +	local = per_cpu_ptr(imsic->global.local, vec->cpu);
> +	writel(vec->local_id, local->msi_va);
> +	return 0;
> +}
> +
> +static void imsic_irq_compose_vector_msg(struct imsic_vector *vec,
> +					 struct msi_msg *msg)
> +{
> +	phys_addr_t msi_addr;
> +	int err;
> +
> +	if (WARN_ON(vec == NULL))
> +		return;
> +
> +	err = imsic_cpu_page_phys(vec->cpu, 0, &msi_addr);
> +	if (WARN_ON(err))
> +		return;

	if (WARN_ON(!imsic_cpu_page_phys(...)))
        	return
Hmm?

> +
> +	msg->address_hi = upper_32_bits(msi_addr);
> +	msg->address_lo = lower_32_bits(msi_addr);
> +	msg->data = vec->local_id;
> +}
> +
> +static void imsic_irq_compose_msg(struct irq_data *d, struct msi_msg *msg)
> +{
> +	imsic_irq_compose_vector_msg(irq_data_get_irq_chip_data(d), msg);
> +}
> +
> +#ifdef CONFIG_SMP
> +static void imsic_msi_update_msg(struct irq_data *d, struct imsic_vector *vec)
> +{
> +	struct msi_msg msg[2] = { [1] = { }, };
> +
> +	imsic_irq_compose_vector_msg(vec, msg);
> +	irq_data_get_irq_chip(d)->irq_write_msi_msg(d, msg);
> +}
> +
> +static int imsic_irq_set_affinity(struct irq_data *d,
> +				  const struct cpumask *mask_val,
> +				  bool force)
> +{
> +	struct imsic_vector *old_vec, *new_vec;
> +	struct irq_data *pd = d->parent_data;
> +
> +	old_vec = irq_data_get_irq_chip_data(pd);
> +	if (WARN_ON(old_vec == NULL))
> +		return -ENOENT;
> +
> +	/* Get a new vector on the desired set of CPUs */
> +	new_vec = imsic_vector_alloc(old_vec->hwirq, mask_val);
> +	if (!new_vec)
> +		return -ENOSPC;
> +
> +	/* If old vector belongs to the desired CPU then do nothing */
> +	if (old_vec->cpu == new_vec->cpu) {
> +		imsic_vector_free(new_vec);
> +		return IRQ_SET_MASK_OK_DONE;
> +	}

You can spare that exercise by checking it before the allocation:

        if (cpumask_test_cpu(old_vec->cpu, mask_val))
		return IRQ_SET_MASK_OK_DONE;

> +
> +	/* Point device to the new vector */
> +	imsic_msi_update_msg(d, new_vec);

> +static int imsic_irq_domain_alloc(struct irq_domain *domain,
> +				  unsigned int virq, unsigned int nr_irqs,
> +				  void *args)
> +{
> +	struct imsic_vector *vec;
> +	int hwirq;
> +
> +	/* Legacy-MSI or multi-MSI not supported yet. */

What's legacy MSI in that context?

> +	if (nr_irqs > 1)
> +		return -ENOTSUPP;
> +
> +	hwirq = imsic_hwirq_alloc();
> +	if (hwirq < 0)
> +		return hwirq;
> +
> +	vec = imsic_vector_alloc(hwirq, cpu_online_mask);
> +	if (!vec) {
> +		imsic_hwirq_free(hwirq);
> +		return -ENOSPC;
> +	}
> +
> +	irq_domain_set_info(domain, virq, hwirq,
> +			    &imsic_irq_base_chip, vec,
> +			    handle_simple_irq, NULL, NULL);
> +	irq_set_noprobe(virq);
> +	irq_set_affinity(virq, cpu_online_mask);
> +
> +	/*
> +	 * IMSIC does not implement irq_disable() so Linux interrupt
> +	 * subsystem will take a lazy approach for disabling an IMSIC
> +	 * interrupt. This means IMSIC interrupts are left unmasked
> +	 * upon system suspend and interrupts are not processed
> +	 * immediately upon system wake up. To tackle this, we disable
> +	 * the lazy approach for all IMSIC interrupts.

Why? Lazy works perfectly fine even w/o an irq_disable() callback.

> +	 */
> +	irq_set_status_flags(virq, IRQ_DISABLE_UNLAZY);

> +
> +#define MATCH_PLATFORM_MSI		BIT(DOMAIN_BUS_PLATFORM_MSI)

You really love macro indirections :)

> +static const struct msi_parent_ops imsic_msi_parent_ops = {
> +	.supported_flags	= MSI_GENERIC_FLAGS_MASK,
> +	.required_flags		= MSI_FLAG_USE_DEF_DOM_OPS |
> +				  MSI_FLAG_USE_DEF_CHIP_OPS,
> +	.bus_select_token	= DOMAIN_BUS_NEXUS,
> +	.bus_select_mask	= MATCH_PLATFORM_MSI,
> +	.init_dev_msi_info	= imsic_init_dev_msi_info,
> +};
> +
> +int imsic_irqdomain_init(void)
> +{
> +	struct imsic_global_config *global;
> +
> +	if (!imsic || !imsic->fwnode) {
> +		pr_err("early driver not probed\n");
> +		return -ENODEV;
> +	}
> +
> +	if (imsic->base_domain) {
> +		pr_err("%pfwP: irq domain already created\n", imsic->fwnode);
> +		return -ENODEV;
> +	}
> +
> +	global = &imsic->global;

Please move that assignment down to the usage site. Here it's just a
distraction.

> +	/* Create Base IRQ domain */
> +	imsic->base_domain = irq_domain_create_tree(imsic->fwnode,
> +					&imsic_base_domain_ops, imsic);
> +	if (!imsic->base_domain) {
> +		pr_err("%pfwP: failed to create IMSIC base domain\n",
> +			imsic->fwnode);
> +		return -ENOMEM;
> +	}
> +	imsic->base_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT;
> +	imsic->base_domain->msi_parent_ops = &imsic_msi_parent_ops;

Thanks,

        tglx
  
Anup Patel Feb. 19, 2024, 4:10 a.m. UTC | #3
On Sat, Feb 17, 2024 at 1:42 AM Thomas Gleixner <tglx@linutronix.de> wrote:
>
> On Sat, Jan 27 2024 at 21:47, Anup Patel wrote:
> > +static int imsic_cpu_page_phys(unsigned int cpu,
> > +                            unsigned int guest_index,
> > +                            phys_addr_t *out_msi_pa)
> > +{
> > +     struct imsic_global_config *global;
> > +     struct imsic_local_config *local;
> > +
> > +     global = &imsic->global;
> > +     local = per_cpu_ptr(global->local, cpu);
> > +
> > +     if (BIT(global->guest_index_bits) <= guest_index)
> > +             return -EINVAL;
>
> As the callsite does not care about the return value, just make this
> function boolean and return true on success.

Okay, I will update.

>
> > +     if (out_msi_pa)
> > +             *out_msi_pa = local->msi_pa +
> > +                           (guest_index * IMSIC_MMIO_PAGE_SZ);
> > +
> > +     return 0;
> > +}
> > +
> > +static void imsic_irq_mask(struct irq_data *d)
> > +{
> > +     imsic_vector_mask(irq_data_get_irq_chip_data(d));
> > +}
> > +
> > +static void imsic_irq_unmask(struct irq_data *d)
> > +{
> > +     imsic_vector_unmask(irq_data_get_irq_chip_data(d));
> > +}
> > +
> > +static int imsic_irq_retrigger(struct irq_data *d)
> > +{
> > +     struct imsic_vector *vec = irq_data_get_irq_chip_data(d);
> > +     struct imsic_local_config *local;
> > +
> > +     if (WARN_ON(vec == NULL))
> > +             return -ENOENT;
> > +
> > +     local = per_cpu_ptr(imsic->global.local, vec->cpu);
> > +     writel(vec->local_id, local->msi_va);
> > +     return 0;
> > +}
> > +
> > +static void imsic_irq_compose_vector_msg(struct imsic_vector *vec,
> > +                                      struct msi_msg *msg)
> > +{
> > +     phys_addr_t msi_addr;
> > +     int err;
> > +
> > +     if (WARN_ON(vec == NULL))
> > +             return;
> > +
> > +     err = imsic_cpu_page_phys(vec->cpu, 0, &msi_addr);
> > +     if (WARN_ON(err))
> > +             return;
>
>         if (WARN_ON(!imsic_cpu_page_phys(...)))
>                 return
> Hmm?

Okay, I will update like you suggested.

>
> > +
> > +     msg->address_hi = upper_32_bits(msi_addr);
> > +     msg->address_lo = lower_32_bits(msi_addr);
> > +     msg->data = vec->local_id;
> > +}
> > +
> > +static void imsic_irq_compose_msg(struct irq_data *d, struct msi_msg *msg)
> > +{
> > +     imsic_irq_compose_vector_msg(irq_data_get_irq_chip_data(d), msg);
> > +}
> > +
> > +#ifdef CONFIG_SMP
> > +static void imsic_msi_update_msg(struct irq_data *d, struct imsic_vector *vec)
> > +{
> > +     struct msi_msg msg[2] = { [1] = { }, };
> > +
> > +     imsic_irq_compose_vector_msg(vec, msg);
> > +     irq_data_get_irq_chip(d)->irq_write_msi_msg(d, msg);
> > +}
> > +
> > +static int imsic_irq_set_affinity(struct irq_data *d,
> > +                               const struct cpumask *mask_val,
> > +                               bool force)
> > +{
> > +     struct imsic_vector *old_vec, *new_vec;
> > +     struct irq_data *pd = d->parent_data;
> > +
> > +     old_vec = irq_data_get_irq_chip_data(pd);
> > +     if (WARN_ON(old_vec == NULL))
> > +             return -ENOENT;
> > +
> > +     /* Get a new vector on the desired set of CPUs */
> > +     new_vec = imsic_vector_alloc(old_vec->hwirq, mask_val);
> > +     if (!new_vec)
> > +             return -ENOSPC;
> > +
> > +     /* If old vector belongs to the desired CPU then do nothing */
> > +     if (old_vec->cpu == new_vec->cpu) {
> > +             imsic_vector_free(new_vec);
> > +             return IRQ_SET_MASK_OK_DONE;
> > +     }
>
> You can spare that exercise by checking it before the allocation:
>
>         if (cpumask_test_cpu(old_vec->cpu, mask_val))
>                 return IRQ_SET_MASK_OK_DONE;

Okay, I will update.

>
> > +
> > +     /* Point device to the new vector */
> > +     imsic_msi_update_msg(d, new_vec);
>
> > +static int imsic_irq_domain_alloc(struct irq_domain *domain,
> > +                               unsigned int virq, unsigned int nr_irqs,
> > +                               void *args)
> > +{
> > +     struct imsic_vector *vec;
> > +     int hwirq;
> > +
> > +     /* Legacy-MSI or multi-MSI not supported yet. */
>
> What's legacy MSI in that context?

The legacy-MSI is the MSI support in PCI v2.2 where
number of MSIs allocated by device were either 1, 2, 4,
8, 16, or 32 and the data written is <data_word> + <irqnum>.

>
> > +     if (nr_irqs > 1)
> > +             return -ENOTSUPP;
> > +
> > +     hwirq = imsic_hwirq_alloc();
> > +     if (hwirq < 0)
> > +             return hwirq;
> > +
> > +     vec = imsic_vector_alloc(hwirq, cpu_online_mask);
> > +     if (!vec) {
> > +             imsic_hwirq_free(hwirq);
> > +             return -ENOSPC;
> > +     }
> > +
> > +     irq_domain_set_info(domain, virq, hwirq,
> > +                         &imsic_irq_base_chip, vec,
> > +                         handle_simple_irq, NULL, NULL);
> > +     irq_set_noprobe(virq);
> > +     irq_set_affinity(virq, cpu_online_mask);
> > +
> > +     /*
> > +      * IMSIC does not implement irq_disable() so Linux interrupt
> > +      * subsystem will take a lazy approach for disabling an IMSIC
> > +      * interrupt. This means IMSIC interrupts are left unmasked
> > +      * upon system suspend and interrupts are not processed
> > +      * immediately upon system wake up. To tackle this, we disable
> > +      * the lazy approach for all IMSIC interrupts.
>
> Why? Lazy works perfectly fine even w/o an irq_disable() callback.

This was suggested by SiFive folks. I am also not sure why we
need this. For now, I will drop this and bring it back as a separate
patch if required.

>
> > +      */
> > +     irq_set_status_flags(virq, IRQ_DISABLE_UNLAZY);
>
> > +
> > +#define MATCH_PLATFORM_MSI           BIT(DOMAIN_BUS_PLATFORM_MSI)
>
> You really love macro indirections :)

This is to be consistent with MATCH_PCI_MSI introduced by the
subsequent patch.

Also, this is inspired from your ARM GIC patches.
https://lore.kernel.org/linux-arm-kernel/20221121140049.038269899@linutronix.de/
https://lore.kernel.org/linux-arm-kernel/20221121140049.112451419@linutronix.de/
https://lore.kernel.org/linux-arm-kernel/20221121140049.237988384@linutronix.de/
https://lore.kernel.org/linux-arm-kernel/20221121140049.941784867@linutronix.de/

>
> > +static const struct msi_parent_ops imsic_msi_parent_ops = {
> > +     .supported_flags        = MSI_GENERIC_FLAGS_MASK,
> > +     .required_flags         = MSI_FLAG_USE_DEF_DOM_OPS |
> > +                               MSI_FLAG_USE_DEF_CHIP_OPS,
> > +     .bus_select_token       = DOMAIN_BUS_NEXUS,
> > +     .bus_select_mask        = MATCH_PLATFORM_MSI,
> > +     .init_dev_msi_info      = imsic_init_dev_msi_info,
> > +};
> > +
> > +int imsic_irqdomain_init(void)
> > +{
> > +     struct imsic_global_config *global;
> > +
> > +     if (!imsic || !imsic->fwnode) {
> > +             pr_err("early driver not probed\n");
> > +             return -ENODEV;
> > +     }
> > +
> > +     if (imsic->base_domain) {
> > +             pr_err("%pfwP: irq domain already created\n", imsic->fwnode);
> > +             return -ENODEV;
> > +     }
> > +
> > +     global = &imsic->global;
>
> Please move that assignment down to the usage site. Here it's just a
> distraction.

Okay, I will update.

>
> > +     /* Create Base IRQ domain */
> > +     imsic->base_domain = irq_domain_create_tree(imsic->fwnode,
> > +                                     &imsic_base_domain_ops, imsic);
> > +     if (!imsic->base_domain) {
> > +             pr_err("%pfwP: failed to create IMSIC base domain\n",
> > +                     imsic->fwnode);
> > +             return -ENOMEM;
> > +     }
> > +     imsic->base_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT;
> > +     imsic->base_domain->msi_parent_ops = &imsic_msi_parent_ops;
>

Regards,
Anup
  

Patch

diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index d714724387ce..abca445a3229 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -95,7 +95,7 @@  obj-$(CONFIG_QCOM_MPM)			+= irq-qcom-mpm.o
 obj-$(CONFIG_CSKY_MPINTC)		+= irq-csky-mpintc.o
 obj-$(CONFIG_CSKY_APB_INTC)		+= irq-csky-apb-intc.o
 obj-$(CONFIG_RISCV_INTC)		+= irq-riscv-intc.o
-obj-$(CONFIG_RISCV_IMSIC)		+= irq-riscv-imsic-state.o irq-riscv-imsic-early.o
+obj-$(CONFIG_RISCV_IMSIC)		+= irq-riscv-imsic-state.o irq-riscv-imsic-early.o irq-riscv-imsic-platform.o
 obj-$(CONFIG_SIFIVE_PLIC)		+= irq-sifive-plic.o
 obj-$(CONFIG_IMX_IRQSTEER)		+= irq-imx-irqsteer.o
 obj-$(CONFIG_IMX_INTMUX)		+= irq-imx-intmux.o
diff --git a/drivers/irqchip/irq-riscv-imsic-platform.c b/drivers/irqchip/irq-riscv-imsic-platform.c
new file mode 100644
index 000000000000..65791a6b0727
--- /dev/null
+++ b/drivers/irqchip/irq-riscv-imsic-platform.c
@@ -0,0 +1,371 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ * Copyright (C) 2022 Ventana Micro Systems Inc.
+ */
+
+#define pr_fmt(fmt) "riscv-imsic: " fmt
+#include <linux/bitmap.h>
+#include <linux/cpu.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqdomain.h>
+#include <linux/module.h>
+#include <linux/msi.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+
+#include "irq-riscv-imsic-state.h"
+
+static int imsic_cpu_page_phys(unsigned int cpu,
+			       unsigned int guest_index,
+			       phys_addr_t *out_msi_pa)
+{
+	struct imsic_global_config *global;
+	struct imsic_local_config *local;
+
+	global = &imsic->global;
+	local = per_cpu_ptr(global->local, cpu);
+
+	if (BIT(global->guest_index_bits) <= guest_index)
+		return -EINVAL;
+
+	if (out_msi_pa)
+		*out_msi_pa = local->msi_pa +
+			      (guest_index * IMSIC_MMIO_PAGE_SZ);
+
+	return 0;
+}
+
+static void imsic_irq_mask(struct irq_data *d)
+{
+	imsic_vector_mask(irq_data_get_irq_chip_data(d));
+}
+
+static void imsic_irq_unmask(struct irq_data *d)
+{
+	imsic_vector_unmask(irq_data_get_irq_chip_data(d));
+}
+
+static int imsic_irq_retrigger(struct irq_data *d)
+{
+	struct imsic_vector *vec = irq_data_get_irq_chip_data(d);
+	struct imsic_local_config *local;
+
+	if (WARN_ON(vec == NULL))
+		return -ENOENT;
+
+	local = per_cpu_ptr(imsic->global.local, vec->cpu);
+	writel(vec->local_id, local->msi_va);
+	return 0;
+}
+
+static void imsic_irq_compose_vector_msg(struct imsic_vector *vec,
+					 struct msi_msg *msg)
+{
+	phys_addr_t msi_addr;
+	int err;
+
+	if (WARN_ON(vec == NULL))
+		return;
+
+	err = imsic_cpu_page_phys(vec->cpu, 0, &msi_addr);
+	if (WARN_ON(err))
+		return;
+
+	msg->address_hi = upper_32_bits(msi_addr);
+	msg->address_lo = lower_32_bits(msi_addr);
+	msg->data = vec->local_id;
+}
+
+static void imsic_irq_compose_msg(struct irq_data *d, struct msi_msg *msg)
+{
+	imsic_irq_compose_vector_msg(irq_data_get_irq_chip_data(d), msg);
+}
+
+#ifdef CONFIG_SMP
+static void imsic_msi_update_msg(struct irq_data *d, struct imsic_vector *vec)
+{
+	struct msi_msg msg[2] = { [1] = { }, };
+
+	imsic_irq_compose_vector_msg(vec, msg);
+	irq_data_get_irq_chip(d)->irq_write_msi_msg(d, msg);
+}
+
+static int imsic_irq_set_affinity(struct irq_data *d,
+				  const struct cpumask *mask_val,
+				  bool force)
+{
+	struct imsic_vector *old_vec, *new_vec;
+	struct irq_data *pd = d->parent_data;
+
+	old_vec = irq_data_get_irq_chip_data(pd);
+	if (WARN_ON(old_vec == NULL))
+		return -ENOENT;
+
+	/* Get a new vector on the desired set of CPUs */
+	new_vec = imsic_vector_alloc(old_vec->hwirq, mask_val);
+	if (!new_vec)
+		return -ENOSPC;
+
+	/* If old vector belongs to the desired CPU then do nothing */
+	if (old_vec->cpu == new_vec->cpu) {
+		imsic_vector_free(new_vec);
+		return IRQ_SET_MASK_OK_DONE;
+	}
+
+	/* Point device to the new vector */
+	imsic_msi_update_msg(d, new_vec);
+
+	/* Update irq descriptors with the new vector */
+	pd->chip_data = new_vec;
+
+	/* Update effective affinity of parent irq data */
+	irq_data_update_effective_affinity(pd, cpumask_of(new_vec->cpu));
+
+	/* Move state of the old vector to the new vector */
+	imsic_vector_move(old_vec, new_vec);
+
+	return IRQ_SET_MASK_OK_DONE;
+}
+#endif
+
+static struct irq_chip imsic_irq_base_chip = {
+	.name			= "IMSIC",
+	.irq_mask		= imsic_irq_mask,
+	.irq_unmask		= imsic_irq_unmask,
+	.irq_retrigger		= imsic_irq_retrigger,
+	.irq_compose_msi_msg	= imsic_irq_compose_msg,
+	.flags			= IRQCHIP_SKIP_SET_WAKE |
+				  IRQCHIP_MASK_ON_SUSPEND,
+};
+
+static int imsic_irq_domain_alloc(struct irq_domain *domain,
+				  unsigned int virq, unsigned int nr_irqs,
+				  void *args)
+{
+	struct imsic_vector *vec;
+	int hwirq;
+
+	/* Legacy-MSI or multi-MSI not supported yet. */
+	if (nr_irqs > 1)
+		return -ENOTSUPP;
+
+	hwirq = imsic_hwirq_alloc();
+	if (hwirq < 0)
+		return hwirq;
+
+	vec = imsic_vector_alloc(hwirq, cpu_online_mask);
+	if (!vec) {
+		imsic_hwirq_free(hwirq);
+		return -ENOSPC;
+	}
+
+	irq_domain_set_info(domain, virq, hwirq,
+			    &imsic_irq_base_chip, vec,
+			    handle_simple_irq, NULL, NULL);
+	irq_set_noprobe(virq);
+	irq_set_affinity(virq, cpu_online_mask);
+
+	/*
+	 * IMSIC does not implement irq_disable() so Linux interrupt
+	 * subsystem will take a lazy approach for disabling an IMSIC
+	 * interrupt. This means IMSIC interrupts are left unmasked
+	 * upon system suspend and interrupts are not processed
+	 * immediately upon system wake up. To tackle this, we disable
+	 * the lazy approach for all IMSIC interrupts.
+	 */
+	irq_set_status_flags(virq, IRQ_DISABLE_UNLAZY);
+
+	return 0;
+}
+
+static void imsic_irq_domain_free(struct irq_domain *domain,
+				  unsigned int virq,
+				  unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+
+	imsic_vector_free(irq_data_get_irq_chip_data(d));
+	imsic_hwirq_free(d->hwirq);
+	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+}
+
+static int imsic_irq_domain_select(struct irq_domain *domain,
+				   struct irq_fwspec *fwspec,
+				   enum irq_domain_bus_token bus_token)
+{
+	const struct msi_parent_ops *ops = domain->msi_parent_ops;
+	u32 busmask = BIT(bus_token);
+
+	if (fwspec->fwnode != domain->fwnode || fwspec->param_count != 0)
+		return 0;
+
+	/* Handle pure domain searches */
+	if (bus_token == ops->bus_select_token)
+		return 1;
+
+	return !!(ops->bus_select_mask & busmask);
+}
+
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+static void imsic_irq_debug_show(struct seq_file *m, struct irq_domain *d,
+				 struct irq_data *irqd, int ind)
+{
+	if (!irqd) {
+		imsic_vector_debug_show_summary(m, ind);
+		return;
+	}
+
+	imsic_vector_debug_show(m, irq_data_get_irq_chip_data(irqd), ind);
+}
+#endif
+
+static const struct irq_domain_ops imsic_base_domain_ops = {
+	.alloc		= imsic_irq_domain_alloc,
+	.free		= imsic_irq_domain_free,
+	.select		= imsic_irq_domain_select,
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+	.debug_show	= imsic_irq_debug_show,
+#endif
+};
+
+static bool imsic_init_dev_msi_info(struct device *dev,
+				    struct irq_domain *domain,
+				    struct irq_domain *real_parent,
+				    struct msi_domain_info *info)
+{
+	const struct msi_parent_ops *pops = real_parent->msi_parent_ops;
+
+	/* MSI parent domain specific settings */
+	switch (real_parent->bus_token) {
+	case DOMAIN_BUS_NEXUS:
+		if (WARN_ON_ONCE(domain != real_parent))
+			return false;
+#ifdef CONFIG_SMP
+		info->chip->irq_set_affinity = imsic_irq_set_affinity;
+#endif
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return false;
+	}
+
+	/* Is the target supported? */
+	switch (info->bus_token) {
+	case DOMAIN_BUS_DEVICE_IMS:
+		/*
+		 * Per device IMS should never have any MSI feature bits
+		 * set. It's sole purpose is to create a dumb interrupt
+		 * chip which has a device specific irq_write_msi_msg()
+		 * callback.
+		 */
+		if (WARN_ON_ONCE(info->flags))
+			return false;
+
+		/* Core managed MSI descriptors */
+		info->flags |= MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS |
+			       MSI_FLAG_FREE_MSI_DESCS;
+		break;
+	case DOMAIN_BUS_WIRED_TO_MSI:
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return false;
+	}
+
+	/* Use hierarchial chip operations re-trigger */
+	info->chip->irq_retrigger = irq_chip_retrigger_hierarchy;
+
+	/*
+	 * Mask out the domain specific MSI feature flags which are not
+	 * supported by the real parent.
+	 */
+	info->flags &= pops->supported_flags;
+
+	/* Enforce the required flags */
+	info->flags |= pops->required_flags;
+
+	return true;
+}
+
+#define MATCH_PLATFORM_MSI		BIT(DOMAIN_BUS_PLATFORM_MSI)
+
+static const struct msi_parent_ops imsic_msi_parent_ops = {
+	.supported_flags	= MSI_GENERIC_FLAGS_MASK,
+	.required_flags		= MSI_FLAG_USE_DEF_DOM_OPS |
+				  MSI_FLAG_USE_DEF_CHIP_OPS,
+	.bus_select_token	= DOMAIN_BUS_NEXUS,
+	.bus_select_mask	= MATCH_PLATFORM_MSI,
+	.init_dev_msi_info	= imsic_init_dev_msi_info,
+};
+
+int imsic_irqdomain_init(void)
+{
+	struct imsic_global_config *global;
+
+	if (!imsic || !imsic->fwnode) {
+		pr_err("early driver not probed\n");
+		return -ENODEV;
+	}
+
+	if (imsic->base_domain) {
+		pr_err("%pfwP: irq domain already created\n", imsic->fwnode);
+		return -ENODEV;
+	}
+
+	global = &imsic->global;
+
+	/* Create Base IRQ domain */
+	imsic->base_domain = irq_domain_create_tree(imsic->fwnode,
+					&imsic_base_domain_ops, imsic);
+	if (!imsic->base_domain) {
+		pr_err("%pfwP: failed to create IMSIC base domain\n",
+			imsic->fwnode);
+		return -ENOMEM;
+	}
+	imsic->base_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT;
+	imsic->base_domain->msi_parent_ops = &imsic_msi_parent_ops;
+
+	irq_domain_update_bus_token(imsic->base_domain, DOMAIN_BUS_NEXUS);
+
+	pr_info("%pfwP:  hart-index-bits: %d,  guest-index-bits: %d\n",
+		imsic->fwnode, global->hart_index_bits, global->guest_index_bits);
+	pr_info("%pfwP: group-index-bits: %d, group-index-shift: %d\n",
+		imsic->fwnode, global->group_index_bits, global->group_index_shift);
+	pr_info("%pfwP: per-CPU IDs %d at base PPN %pa\n",
+		imsic->fwnode, global->nr_ids, &global->base_addr);
+	pr_info("%pfwP: total %d interrupts available\n",
+		imsic->fwnode, imsic->nr_hwirqs);
+
+	return 0;
+}
+
+static int imsic_platform_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+
+	if (imsic && imsic->fwnode != dev->fwnode) {
+		dev_err(dev, "fwnode mismatch\n");
+		return -ENODEV;
+	}
+
+	return imsic_irqdomain_init();
+}
+
+static const struct of_device_id imsic_platform_match[] = {
+	{ .compatible = "riscv,imsics" },
+	{}
+};
+
+static struct platform_driver imsic_platform_driver = {
+	.driver = {
+		.name		= "riscv-imsic",
+		.of_match_table	= imsic_platform_match,
+	},
+	.probe = imsic_platform_probe,
+};
+builtin_platform_driver(imsic_platform_driver);
diff --git a/drivers/irqchip/irq-riscv-imsic-state.h b/drivers/irqchip/irq-riscv-imsic-state.h
index de83b649221c..c76cab08bf78 100644
--- a/drivers/irqchip/irq-riscv-imsic-state.h
+++ b/drivers/irqchip/irq-riscv-imsic-state.h
@@ -62,7 +62,6 @@  struct imsic_priv {
 
 	/* IRQ domains (created by platform driver) */
 	struct irq_domain *base_domain;
-	struct irq_domain *plat_domain;
 };
 
 extern struct imsic_priv *imsic;
@@ -101,5 +100,6 @@  void imsic_hwirq_free(unsigned int hwirq);
 void imsic_state_online(void);
 void imsic_state_offline(void);
 int imsic_setup_state(struct fwnode_handle *fwnode);
+int imsic_irqdomain_init(void);
 
 #endif