[v13,06/13] irqchip: Add RISC-V incoming MSI controller early driver

Message ID 20240220060718.823229-7-apatel@ventanamicro.com
State New
Headers
Series Linux RISC-V AIA Support |

Commit Message

Anup Patel Feb. 20, 2024, 6:07 a.m. UTC
  The RISC-V advanced interrupt architecture (AIA) specification
defines a new MSI controller called incoming message signalled
interrupt controller (IMSIC) which manages MSI on per-HART (or
per-CPU) basis. It also supports IPIs as software injected MSIs.
(For more details refer https://github.com/riscv/riscv-aia)

Let us add an early irqchip driver for RISC-V IMSIC which sets
up the IMSIC state and provide IPIs.

Signed-off-by: Anup Patel <apatel@ventanamicro.com>
---
 drivers/irqchip/Kconfig                 |   7 +
 drivers/irqchip/Makefile                |   1 +
 drivers/irqchip/irq-riscv-imsic-early.c | 213 ++++++
 drivers/irqchip/irq-riscv-imsic-state.c | 906 ++++++++++++++++++++++++
 drivers/irqchip/irq-riscv-imsic-state.h |  98 +++
 include/linux/irqchip/riscv-imsic.h     |  87 +++
 6 files changed, 1312 insertions(+)
 create mode 100644 drivers/irqchip/irq-riscv-imsic-early.c
 create mode 100644 drivers/irqchip/irq-riscv-imsic-state.c
 create mode 100644 drivers/irqchip/irq-riscv-imsic-state.h
 create mode 100644 include/linux/irqchip/riscv-imsic.h
  

Comments

Björn Töpel Feb. 20, 2024, 11:52 a.m. UTC | #1
Anup,

This version is so much easier to follow! Thanks a lot for then
cleanups/design changes.

A bunch of nits, and a major one, below.

Anup Patel <apatel@ventanamicro.com> writes:

> The RISC-V advanced interrupt architecture (AIA) specification
> defines a new MSI controller called incoming message signalled
> interrupt controller (IMSIC) which manages MSI on per-HART (or
> per-CPU) basis. It also supports IPIs as software injected MSIs.
> (For more details refer https://github.com/riscv/riscv-aia)
>
> Let us add an early irqchip driver for RISC-V IMSIC which sets
> up the IMSIC state and provide IPIs.
>
> Signed-off-by: Anup Patel <apatel@ventanamicro.com>
> ---
>  drivers/irqchip/Kconfig                 |   7 +
>  drivers/irqchip/Makefile                |   1 +
>  drivers/irqchip/irq-riscv-imsic-early.c | 213 ++++++
>  drivers/irqchip/irq-riscv-imsic-state.c | 906 ++++++++++++++++++++++++
>  drivers/irqchip/irq-riscv-imsic-state.h |  98 +++
>  include/linux/irqchip/riscv-imsic.h     |  87 +++
>  6 files changed, 1312 insertions(+)
>  create mode 100644 drivers/irqchip/irq-riscv-imsic-early.c
>  create mode 100644 drivers/irqchip/irq-riscv-imsic-state.c
>  create mode 100644 drivers/irqchip/irq-riscv-imsic-state.h
>  create mode 100644 include/linux/irqchip/riscv-imsic.h
>
> diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
> index f7149d0f3d45..85f86e31c996 100644
> --- a/drivers/irqchip/Kconfig
> +++ b/drivers/irqchip/Kconfig
> @@ -546,6 +546,13 @@ config SIFIVE_PLIC
>  	select IRQ_DOMAIN_HIERARCHY
>  	select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP
>  
> +config RISCV_IMSIC
> +	bool
> +	depends on RISCV
> +	select IRQ_DOMAIN_HIERARCHY
> +	select GENERIC_IRQ_MATRIX_ALLOCATOR
> +	select GENERIC_MSI_IRQ
> +
>  config EXYNOS_IRQ_COMBINER
>  	bool "Samsung Exynos IRQ combiner support" if COMPILE_TEST
>  	depends on (ARCH_EXYNOS && ARM) || COMPILE_TEST
> diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
> index ffd945fe71aa..d714724387ce 100644
> --- a/drivers/irqchip/Makefile
> +++ b/drivers/irqchip/Makefile
> @@ -95,6 +95,7 @@ obj-$(CONFIG_QCOM_MPM)			+= irq-qcom-mpm.o
>  obj-$(CONFIG_CSKY_MPINTC)		+= irq-csky-mpintc.o
>  obj-$(CONFIG_CSKY_APB_INTC)		+= irq-csky-apb-intc.o
>  obj-$(CONFIG_RISCV_INTC)		+= irq-riscv-intc.o
> +obj-$(CONFIG_RISCV_IMSIC)		+= irq-riscv-imsic-state.o irq-riscv-imsic-early.o
>  obj-$(CONFIG_SIFIVE_PLIC)		+= irq-sifive-plic.o
>  obj-$(CONFIG_IMX_IRQSTEER)		+= irq-imx-irqsteer.o
>  obj-$(CONFIG_IMX_INTMUX)		+= irq-imx-intmux.o
> diff --git a/drivers/irqchip/irq-riscv-imsic-early.c b/drivers/irqchip/irq-riscv-imsic-early.c
> new file mode 100644
> index 000000000000..32fe428b1c19
> --- /dev/null
> +++ b/drivers/irqchip/irq-riscv-imsic-early.c
> @@ -0,0 +1,213 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2021 Western Digital Corporation or its affiliates.
> + * Copyright (C) 2022 Ventana Micro Systems Inc.
> + */
> +
> +#define pr_fmt(fmt) "riscv-imsic: " fmt
> +#include <linux/cpu.h>
> +#include <linux/interrupt.h>
> +#include <linux/io.h>
> +#include <linux/irq.h>
> +#include <linux/irqchip.h>
> +#include <linux/irqchip/chained_irq.h>
> +#include <linux/module.h>
> +#include <linux/spinlock.h>
> +#include <linux/smp.h>
> +
> +#include "irq-riscv-imsic-state.h"
> +
> +static int imsic_parent_irq;
> +
> +#ifdef CONFIG_SMP
> +static void imsic_ipi_send(unsigned int cpu)
> +{
> +	struct imsic_local_config *local = per_cpu_ptr(imsic->global.local, cpu);
> +
> +	writel_relaxed(IMSIC_IPI_ID, local->msi_va);
> +}
> +
> +static void imsic_ipi_starting_cpu(void)
> +{
> +	/* Enable IPIs for current CPU. */
> +	__imsic_id_set_enable(IMSIC_IPI_ID);
> +}
> +
> +static void imsic_ipi_dying_cpu(void)
> +{
> +	/* Disable IPIs for current CPU. */
> +	__imsic_id_clear_enable(IMSIC_IPI_ID);
> +}
> +
> +static int __init imsic_ipi_domain_init(void)
> +{
> +	int virq;
> +
> +	/* Create IMSIC IPI multiplexing */
> +	virq = ipi_mux_create(IMSIC_NR_IPI, imsic_ipi_send);
> +	if (virq <= 0)
> +		return (virq < 0) ? virq : -ENOMEM;

Nit: No parenthesis need to clutter.

> +
> +	/* Set vIRQ range */
> +	riscv_ipi_set_virq_range(virq, IMSIC_NR_IPI, true);
> +
> +	/* Announce that IMSIC is providing IPIs */
> +	pr_info("%pfwP: providing IPIs using interrupt %d\n", imsic->fwnode, IMSIC_IPI_ID);
> +
> +	return 0;
> +}
> +#else
> +static void imsic_ipi_starting_cpu(void)
> +{
> +}
> +
> +static void imsic_ipi_dying_cpu(void)
> +{
> +}
> +
> +static int __init imsic_ipi_domain_init(void)
> +{
> +	return 0;
> +}
> +#endif
> +
> +/*
> + * To handle an interrupt, we read the TOPEI CSR and write zero in one
> + * instruction. If TOPEI CSR is non-zero then we translate TOPEI.ID to
> + * Linux interrupt number and let Linux IRQ subsystem handle it.
> + */
> +static void imsic_handle_irq(struct irq_desc *desc)
> +{
> +	struct irq_chip *chip = irq_desc_get_chip(desc);
> +	int err, cpu = smp_processor_id();
> +	struct imsic_vector *vec;
> +	unsigned long local_id;
> +
> +	chained_irq_enter(chip, desc);
> +
> +	while ((local_id = csr_swap(CSR_TOPEI, 0))) {
> +		local_id = local_id >> TOPEI_ID_SHIFT;

Nit: Wdyt about moving shift into the loop predicate, or using >>=?

> +
> +		if (local_id == IMSIC_IPI_ID) {
> +#ifdef CONFIG_SMP
> +			ipi_mux_process();
> +#endif

Is IMSIC_IPI_ID a thing on !IS_ENABLED(CONFIG_SMP)?

> +			continue;
> +		}
> +
> +		if (unlikely(!imsic->base_domain))
> +			continue;
> +
> +		vec = imsic_vector_from_local_id(cpu, local_id);
> +		if (!vec) {
> +			pr_warn_ratelimited("vector not found for local ID 0x%lx\n", local_id);
> +			continue;
> +		}
> +
> +		err = generic_handle_domain_irq(imsic->base_domain,
> +						vec->hwirq);

Nit: 100 chars

> +		if (unlikely(err))
> +			pr_warn_ratelimited("hwirq 0x%x mapping not found\n", vec->hwirq);
> +	}
> +
> +	chained_irq_exit(chip, desc);
> +}
> +
> +static int imsic_starting_cpu(unsigned int cpu)
> +{
> +	/* Mark per-CPU IMSIC state as online */
> +	imsic_state_online();
> +
> +	/* Enable per-CPU parent interrupt */
> +	enable_percpu_irq(imsic_parent_irq, irq_get_trigger_type(imsic_parent_irq));
> +
> +	/* Setup IPIs */
> +	imsic_ipi_starting_cpu();
> +
> +	/*
> +	 * Interrupts identities might have been enabled/disabled while
> +	 * this CPU was not running so sync-up local enable/disable state.
> +	 */
> +	imsic_local_sync_all();
> +
> +	/* Enable local interrupt delivery */
> +	imsic_local_delivery(true);
> +
> +	return 0;
> +}
> +
> +static int imsic_dying_cpu(unsigned int cpu)
> +{
> +	/* Cleanup IPIs */
> +	imsic_ipi_dying_cpu();
> +
> +	/* Mark per-CPU IMSIC state as offline */
> +	imsic_state_offline();
> +
> +	return 0;
> +}
> +
> +static int __init imsic_early_probe(struct fwnode_handle *fwnode)
> +{
> +	struct irq_domain *domain;
> +	int rc;
> +
> +	/* Find parent domain and register chained handler */
> +	domain = irq_find_matching_fwnode(riscv_get_intc_hwnode(), DOMAIN_BUS_ANY);
> +	if (!domain) {
> +		pr_err("%pfwP: Failed to find INTC domain\n", fwnode);
> +		return -ENOENT;
> +	}
> +	imsic_parent_irq = irq_create_mapping(domain, RV_IRQ_EXT);
> +	if (!imsic_parent_irq) {
> +		pr_err("%pfwP: Failed to create INTC mapping\n", fwnode);
> +		return -ENOENT;
> +	}
> +
> +	/* Initialize IPI domain */
> +	rc = imsic_ipi_domain_init();
> +	if (rc) {
> +		pr_err("%pfwP: Failed to initialize IPI domain\n", fwnode);
> +		return rc;
> +	}
> +
> +	/* Setup chained handler to the parent domain interrupt */
> +	irq_set_chained_handler(imsic_parent_irq, imsic_handle_irq);
> +
> +	/*
> +	 * Setup cpuhp state (must be done after setting imsic_parent_irq)
> +	 *
> +	 * Don't disable per-CPU IMSIC file when CPU goes offline
> +	 * because this affects IPI and the masking/unmasking of
> +	 * virtual IPIs is done via generic IPI-Mux
> +	 */
> +	cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "irqchip/riscv/imsic:starting",
> +			  imsic_starting_cpu, imsic_dying_cpu);
> +
> +	return 0;
> +}
> +
> +static int __init imsic_early_dt_init(struct device_node *node,
> +				      struct device_node *parent)
> +{
> +	struct fwnode_handle *fwnode = &node->fwnode;
> +	int rc;
> +
> +	/* Setup IMSIC state */
> +	rc = imsic_setup_state(fwnode);
> +	if (rc) {
> +		pr_err("%pfwP: failed to setup state (error %d)\n",
> +			fwnode, rc);

Nit. 100 chars

> +		return rc;
> +	}
> +
> +	/* Do early setup of IPIs */
> +	rc = imsic_early_probe(fwnode);
> +	if (rc)
> +		return rc;
> +
> +	/* Ensure that OF platform device gets probed */
> +	of_node_clear_flag(node, OF_POPULATED);
> +	return 0;
> +}
> +IRQCHIP_DECLARE(riscv_imsic, "riscv,imsics", imsic_early_dt_init);
> diff --git a/drivers/irqchip/irq-riscv-imsic-state.c b/drivers/irqchip/irq-riscv-imsic-state.c
> new file mode 100644
> index 000000000000..4f347486ec7c
> --- /dev/null
> +++ b/drivers/irqchip/irq-riscv-imsic-state.c
> @@ -0,0 +1,906 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2021 Western Digital Corporation or its affiliates.
> + * Copyright (C) 2022 Ventana Micro Systems Inc.
> + */
> +
> +#define pr_fmt(fmt) "riscv-imsic: " fmt
> +#include <linux/cpu.h>
> +#include <linux/bitmap.h>
> +#include <linux/interrupt.h>
> +#include <linux/irq.h>
> +#include <linux/module.h>
> +#include <linux/of.h>
> +#include <linux/of_address.h>
> +#include <linux/of_irq.h>
> +#include <linux/seq_file.h>
> +#include <linux/spinlock.h>
> +#include <linux/smp.h>
> +#include <asm/hwcap.h>
> +
> +#include "irq-riscv-imsic-state.h"
> +
> +#define IMSIC_DISABLE_EIDELIVERY		0
> +#define IMSIC_ENABLE_EIDELIVERY			1
> +#define IMSIC_DISABLE_EITHRESHOLD		1
> +#define IMSIC_ENABLE_EITHRESHOLD		0
> +
> +static inline void imsic_csr_write(unsigned long reg, unsigned long val)
> +{
> +	csr_write(CSR_ISELECT, reg);
> +	csr_write(CSR_IREG, val);
> +}
> +
> +static inline unsigned long imsic_csr_read(unsigned long reg)
> +{
> +	csr_write(CSR_ISELECT, reg);
> +	return csr_read(CSR_IREG);
> +}
> +
> +static inline unsigned long imsic_csr_read_clear(unsigned long reg, unsigned long val)
> +{
> +	csr_write(CSR_ISELECT, reg);
> +	return csr_read_clear(CSR_IREG, val);
> +}
> +
> +static inline void imsic_csr_set(unsigned long reg, unsigned long val)
> +{
> +	csr_write(CSR_ISELECT, reg);
> +	csr_set(CSR_IREG, val);
> +}
> +
> +static inline void imsic_csr_clear(unsigned long reg, unsigned long val)
> +{
> +	csr_write(CSR_ISELECT, reg);
> +	csr_clear(CSR_IREG, val);
> +}
> +
> +struct imsic_priv *imsic;
> +
> +const struct imsic_global_config *imsic_get_global_config(void)
> +{
> +	return imsic ? &imsic->global : NULL;
> +}
> +EXPORT_SYMBOL_GPL(imsic_get_global_config);
> +
> +static bool __imsic_eix_read_clear(unsigned long id, bool pend)
> +{
> +	unsigned long isel, imask;
> +
> +	isel = id / BITS_PER_LONG;
> +	isel *= BITS_PER_LONG / IMSIC_EIPx_BITS;
> +	isel += pend ? IMSIC_EIP0 : IMSIC_EIE0;
> +	imask = BIT(id & (__riscv_xlen - 1));
> +
> +	return (imsic_csr_read_clear(isel, imask) & imask) ? true : false;

Nit: use return !!(imsic_csr_read_clear(isel, imask) & imask)

> +}
> +
> +static inline bool __imsic_id_read_clear_enabled(unsigned long id)
> +{
> +	return __imsic_eix_read_clear(id, false);
> +}
> +
> +static inline bool __imsic_id_read_clear_pending(unsigned long id)
> +{
> +	return __imsic_eix_read_clear(id, true);
> +}
> +
> +void __imsic_eix_update(unsigned long base_id, unsigned long num_id, bool pend, bool val)
> +{
> +	unsigned long id = base_id, last_id = base_id + num_id;
> +	unsigned long i, isel, ireg;
> +
> +	while (id < last_id) {
> +		isel = id / BITS_PER_LONG;
> +		isel *= BITS_PER_LONG / IMSIC_EIPx_BITS;
> +		isel += (pend) ? IMSIC_EIP0 : IMSIC_EIE0;

Nit: Redundant parenthesis.

> +
> +		/*
> +		 * Prepare the ID mask to be programmed in the
> +		 * IMSIC EIEx and EIPx registers. These registers
> +		 * are XLEN-wide and we must not touch IDs which
> +		 * are < base_id and >= (base_id + num_id).
> +		 */
> +		ireg = 0;
> +		for (i = id & (__riscv_xlen - 1); (id < last_id) && (i < __riscv_xlen); i++) {

Nit: Redundant parenthesis "(id < last_id) && (i < __riscv_xlen)", which
is also inconsistent with other usage in this changeset.

> +			ireg |= BIT(i);
> +			id++;
> +		}
> +
> +		/*
> +		 * The IMSIC EIEx and EIPx registers are indirectly
> +		 * accessed via using ISELECT and IREG CSRs so we
> +		 * need to access these CSRs without getting preempted.
> +		 *
> +		 * All existing users of this function call this
> +		 * function with local IRQs disabled so we don't
> +		 * need to do anything special here.
> +		 */
> +		if (val)
> +			imsic_csr_set(isel, ireg);
> +		else
> +			imsic_csr_clear(isel, ireg);
> +	}
> +}
> +
> +/* MUST be called with lpriv->lock held */
> +static void __imsic_local_sync(struct imsic_local_priv *lpriv)
> +{
> +	struct imsic_local_config *mlocal;
> +	struct imsic_vector *vec, *mvec;
> +	int i;
> +
> +	/* This pairs with the barrier in __imsic_remote_sync(). */
> +	smp_mb();
> +
> +	for_each_set_bit(i, lpriv->dirty_bitmap, imsic->global.nr_ids + 1) {
> +		if (!i || i == IMSIC_IPI_ID)
> +			goto skip;
> +		vec = &lpriv->vectors[i];
> +
> +		if (vec->enable)
> +			__imsic_id_set_enable(i);
> +		else
> +			__imsic_id_clear_enable(i);
> +
> +		/*
> +		 * If the ID was being moved to a new ID on some other CPU
> +		 * then we can get a MSI during the movement so check the
> +		 * ID pending bit and re-trigger the new ID on other CPU
> +		 * using MMIO write.
> +		 */
> +		mvec = vec->move;
> +		vec->move = NULL;
> +		if (mvec && mvec != vec) {
> +			if (__imsic_id_read_clear_pending(i)) {
> +				mlocal = per_cpu_ptr(imsic->global.local, mvec->cpu);
> +				writel_relaxed(mvec->local_id, mlocal->msi_va);
> +			}
> +
> +			imsic_vector_free(&lpriv->vectors[i]);
> +		}
> +
> +skip:
> +		bitmap_clear(lpriv->dirty_bitmap, i, 1);
> +	}
> +}
> +
> +void imsic_local_sync_all(void)
> +{
> +	struct imsic_local_priv *lpriv = this_cpu_ptr(imsic->lpriv);
> +	unsigned long flags;
> +
> +	raw_spin_lock_irqsave(&lpriv->lock, flags);
> +	bitmap_fill(lpriv->dirty_bitmap, imsic->global.nr_ids + 1);
> +	__imsic_local_sync(lpriv);
> +	raw_spin_unlock_irqrestore(&lpriv->lock, flags);
> +}
> +
> +void imsic_local_delivery(bool enable)
> +{
> +	if (enable) {
> +		imsic_csr_write(IMSIC_EITHRESHOLD, IMSIC_ENABLE_EITHRESHOLD);
> +		imsic_csr_write(IMSIC_EIDELIVERY, IMSIC_ENABLE_EIDELIVERY);
> +		return;
> +	}
> +
> +	imsic_csr_write(IMSIC_EIDELIVERY, IMSIC_DISABLE_EIDELIVERY);
> +	imsic_csr_write(IMSIC_EITHRESHOLD, IMSIC_DISABLE_EITHRESHOLD);
> +}
> +
> +#ifdef CONFIG_SMP
> +static void imsic_local_timer_callback(struct timer_list *timer)
> +{
> +	struct imsic_local_priv *lpriv = this_cpu_ptr(imsic->lpriv);
> +	unsigned long flags;
> +
> +	raw_spin_lock_irqsave(&lpriv->lock, flags);
> +	__imsic_local_sync(lpriv);
> +	raw_spin_unlock_irqrestore(&lpriv->lock, flags);
> +}
> +
> +/* MUST be called with lpriv->lock held */
> +static void __imsic_remote_sync(struct imsic_local_priv *lpriv, unsigned int cpu)
> +{
> +	/*
> +	 * Ensure that changes to vector enable, vector move and
> +	 * dirty bitmap are visible to the target CPU.
> +	 *
> +	 * This pairs with the barrier in __imsic_local_sync().
> +	 */
> +	smp_mb();
> +
> +	/*
> +	 * We schedule a timer on the target CPU if the target CPU is not
> +	 * same as the current CPU. An offline CPU will unconditionally
> +	 * synchronize IDs through imsic_starting_cpu() when the
> +	 * CPU is brought up.
> +	 */
> +	if (cpu_online(cpu)) {
> +		if (cpu != smp_processor_id()) {
> +			if (!timer_pending(&lpriv->timer)) {
> +				lpriv->timer.expires = jiffies + 1;
> +				add_timer_on(&lpriv->timer, cpu);
> +			}
> +		} else {
> +			__imsic_local_sync(lpriv);
> +		}

Nit: Early exit/return vs else-clause for readability


> +	}
> +}
> +#else
> +/* MUST be called with lpriv->lock held */
> +static void __imsic_remote_sync(struct imsic_local_priv *lpriv, unsigned int cpu)
> +{
> +	__imsic_local_sync(lpriv);
> +}
> +#endif
> +
> +void imsic_vector_mask(struct imsic_vector *vec)
> +{
> +	struct imsic_local_priv *lpriv;
> +
> +	lpriv = per_cpu_ptr(imsic->lpriv, vec->cpu);
> +	if (WARN_ON(&lpriv->vectors[vec->local_id] != vec))
> +		return;
> +
> +	/*
> +	 * This function is called through Linux irq subsystem with
> +	 * irqs disabled so no need to save/restore irq flags.
> +	 */
> +
> +	raw_spin_lock(&lpriv->lock);
> +
> +	vec->enable = false;
> +	bitmap_set(lpriv->dirty_bitmap, vec->local_id, 1);
> +	__imsic_remote_sync(lpriv, vec->cpu);
> +
> +	raw_spin_unlock(&lpriv->lock);
> +}

Really nice that you're using a timer for the vector affinity change,
and got rid of the special/weird IMSIC/sync IPI. Can you really use a
timer for mask/unmask? That makes the mask/unmask operation
asynchronous!

That was what I was trying to get though with this comment:
https://lore.kernel.org/linux-riscv/87sf24mo1g.fsf@all.your.base.are.belongto.us/

Also, using the smp_* IPI functions, you can pass arguments, so you
don't need the dirty_bitmap tracking the changes.

> +
> +void imsic_vector_unmask(struct imsic_vector *vec)
> +{
> +	struct imsic_local_priv *lpriv;
> +
> +	lpriv = per_cpu_ptr(imsic->lpriv, vec->cpu);
> +	if (WARN_ON(&lpriv->vectors[vec->local_id] != vec))
> +		return;
> +
> +	/*
> +	 * This function is called through Linux irq subsystem with
> +	 * irqs disabled so no need to save/restore irq flags.
> +	 */
> +
> +	raw_spin_lock(&lpriv->lock);
> +
> +	vec->enable = true;
> +	bitmap_set(lpriv->dirty_bitmap, vec->local_id, 1);
> +	__imsic_remote_sync(lpriv, vec->cpu);
> +
> +	raw_spin_unlock(&lpriv->lock);
> +}
> +
> +
> +bool imsic_vector_isenabled(struct imsic_vector *vec)
> +{
> +	struct imsic_local_priv *lpriv = per_cpu_ptr(imsic->lpriv, vec->cpu);
> +	unsigned long flags;
> +	bool ret;
> +
> +	raw_spin_lock_irqsave(&lpriv->lock, flags);
> +	ret = vec->enable;
> +	raw_spin_unlock_irqrestore(&lpriv->lock, flags);
> +
> +	return ret;
> +}
> +
> +struct imsic_vector *imsic_vector_get_move(struct imsic_vector *vec)
> +{
> +	struct imsic_local_priv *lpriv = per_cpu_ptr(imsic->lpriv, vec->cpu);
> +	struct imsic_vector *ret;
> +	unsigned long flags;
> +
> +	raw_spin_lock_irqsave(&lpriv->lock, flags);
> +	ret = vec->move;
> +	raw_spin_unlock_irqrestore(&lpriv->lock, flags);
> +
> +	return ret;
> +}
> +
> +static bool imsic_vector_move_update(struct imsic_local_priv *lpriv, struct imsic_vector *vec,
> +				     bool new_enable, struct imsic_vector *new_move)
> +{
> +	unsigned long flags;
> +	bool enabled;
> +
> +	raw_spin_lock_irqsave(&lpriv->lock, flags);
> +
> +	/* Update enable and move details */
> +	enabled = vec->enable;
> +	vec->enable = new_enable;
> +	vec->move = new_move;
> +
> +	/* Mark the vector as dirty and synchronize */
> +	bitmap_set(lpriv->dirty_bitmap, vec->local_id, 1);
> +	__imsic_remote_sync(lpriv, vec->cpu);
> +
> +	raw_spin_unlock_irqrestore(&lpriv->lock, flags);
> +
> +	return enabled;
> +}
> +
> +void imsic_vector_move(struct imsic_vector *old_vec, struct imsic_vector *new_vec)
> +{
> +	struct imsic_local_priv *old_lpriv, *new_lpriv;
> +	bool enabled;
> +
> +	if (WARN_ON(old_vec->cpu == new_vec->cpu))
> +		return;
> +
> +	old_lpriv = per_cpu_ptr(imsic->lpriv, old_vec->cpu);
> +	if (WARN_ON(&old_lpriv->vectors[old_vec->local_id] != old_vec))
> +		return;
> +
> +	new_lpriv = per_cpu_ptr(imsic->lpriv, new_vec->cpu);
> +	if (WARN_ON(&new_lpriv->vectors[new_vec->local_id] != new_vec))
> +		return;
> +
> +	/*
> +	 * Move and re-trigger the new vector based on the pending
> +	 * state of the old vector because we might get a device
> +	 * interrupt on the old vector while device was being moved
> +	 * to the new vector.
> +	 */
> +	enabled = imsic_vector_move_update(old_lpriv, old_vec, false, new_vec);
> +	imsic_vector_move_update(new_lpriv, new_vec, enabled, new_vec);
> +}
> +
> +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
> +void imsic_vector_debug_show(struct seq_file *m, struct imsic_vector *vec, int ind)
> +{
> +	struct imsic_local_priv *lpriv;
> +	struct imsic_vector *mvec;
> +	bool is_enabled;
> +
> +	lpriv = per_cpu_ptr(imsic->lpriv, vec->cpu);
> +	if (WARN_ON(&lpriv->vectors[vec->local_id] != vec))
> +		return;
> +
> +	is_enabled = imsic_vector_isenabled(vec);
> +	mvec = imsic_vector_get_move(vec);
> +
> +	seq_printf(m, "%*starget_cpu      : %5u\n", ind, "", vec->cpu);
> +	seq_printf(m, "%*starget_local_id : %5u\n", ind, "", vec->local_id);
> +	seq_printf(m, "%*sis_reserved     : %5u\n", ind, "",
> +		   (vec->local_id <= IMSIC_IPI_ID) ? 1 : 0);
> +	seq_printf(m, "%*sis_enabled      : %5u\n", ind, "", (is_enabled) ? 1 : 0);
> +	seq_printf(m, "%*sis_move_pending : %5u\n", ind, "", (mvec) ? 1 : 0);

Nit: Redundant parenthesis.

> +	if (mvec) {
> +		seq_printf(m, "%*smove_cpu        : %5u\n", ind, "", mvec->cpu);
> +		seq_printf(m, "%*smove_local_id   : %5u\n", ind, "", mvec->local_id);
> +	}
> +}
> +
> +void imsic_vector_debug_show_summary(struct seq_file *m, int ind)
> +{
> +	irq_matrix_debug_show(m, imsic->matrix, ind);
> +}
> +#endif
> +
> +struct imsic_vector *imsic_vector_from_local_id(unsigned int cpu, unsigned int local_id)
> +{
> +	struct imsic_local_priv *lpriv = per_cpu_ptr(imsic->lpriv, cpu);
> +
> +	if (!lpriv || imsic->global.nr_ids < local_id)
> +		return NULL;
> +
> +	return &lpriv->vectors[local_id];
> +}
> +
> +struct imsic_vector *imsic_vector_alloc(unsigned int hwirq, const struct cpumask *mask)
> +{
> +	struct imsic_vector *vec = NULL;
> +	struct imsic_local_priv *lpriv;
> +	unsigned long flags;
> +	unsigned int cpu;
> +	int local_id;
> +
> +	raw_spin_lock_irqsave(&imsic->matrix_lock, flags);
> +	local_id = irq_matrix_alloc(imsic->matrix, mask, false, &cpu);
> +	raw_spin_unlock_irqrestore(&imsic->matrix_lock, flags);
> +	if (local_id < 0)
> +		return NULL;
> +
> +	lpriv = per_cpu_ptr(imsic->lpriv, cpu);
> +	vec = &lpriv->vectors[local_id];
> +	vec->hwirq = hwirq;
> +	vec->enable = false;
> +	vec->move = NULL;
> +
> +	return vec;
> +}
> +
> +void imsic_vector_free(struct imsic_vector *vec)
> +{
> +	unsigned long flags;
> +
> +	raw_spin_lock_irqsave(&imsic->matrix_lock, flags);
> +	vec->hwirq = UINT_MAX;
> +	irq_matrix_free(imsic->matrix, vec->cpu, vec->local_id, false);
> +	raw_spin_unlock_irqrestore(&imsic->matrix_lock, flags);
> +}
> +
> +static void __init imsic_local_cleanup(void)
> +{
> +	int cpu;
> +	struct imsic_local_priv *lpriv;
> +
> +	for_each_possible_cpu(cpu) {
> +		lpriv = per_cpu_ptr(imsic->lpriv, cpu);
> +
> +		bitmap_free(lpriv->dirty_bitmap);
> +		kfree(lpriv->vectors);
> +	}
> +
> +	free_percpu(imsic->lpriv);
> +}
> +
> +static int __init imsic_local_init(void)
> +{
> +	struct imsic_global_config *global = &imsic->global;
> +	struct imsic_local_priv *lpriv;
> +	struct imsic_vector *vec;
> +	int cpu, i;
> +
> +	/* Allocate per-CPU private state */
> +	imsic->lpriv = alloc_percpu(typeof(*(imsic->lpriv)));
> +	if (!imsic->lpriv)
> +		return -ENOMEM;
> +
> +	/* Setup per-CPU private state */
> +	for_each_possible_cpu(cpu) {
> +		lpriv = per_cpu_ptr(imsic->lpriv, cpu);
> +
> +		raw_spin_lock_init(&lpriv->lock);
> +
> +		/* Allocate dirty bitmap */
> +		lpriv->dirty_bitmap = bitmap_zalloc(global->nr_ids + 1, GFP_KERNEL);
> +		if (!lpriv->dirty_bitmap)
> +			goto fail_local_cleanup;
> +
> +#ifdef CONFIG_SMP
> +		/* Setup lazy timer for synchronization */
> +		timer_setup(&lpriv->timer, imsic_local_timer_callback, TIMER_PINNED);
> +#endif
> +
> +		/* Allocate vector array */
> +		lpriv->vectors = kcalloc(global->nr_ids + 1, sizeof(*lpriv->vectors),
> +					 GFP_KERNEL);
> +		if (!lpriv->vectors)
> +			goto fail_local_cleanup;
> +
> +		/* Setup vector array */
> +		for (i = 0; i <= global->nr_ids; i++) {
> +			vec = &lpriv->vectors[i];
> +			vec->cpu = cpu;
> +			vec->local_id = i;
> +			vec->hwirq = UINT_MAX;
> +		}
> +	}
> +
> +	return 0;
> +
> +fail_local_cleanup:
> +	imsic_local_cleanup();
> +	return -ENOMEM;
> +}
> +
> +void imsic_state_online(void)
> +{
> +	unsigned long flags;
> +
> +	raw_spin_lock_irqsave(&imsic->matrix_lock, flags);
> +	irq_matrix_online(imsic->matrix);
> +	raw_spin_unlock_irqrestore(&imsic->matrix_lock, flags);
> +}
> +
> +void imsic_state_offline(void)
> +{
> +#ifdef CONFIG_SMP
> +	struct imsic_local_priv *lpriv = this_cpu_ptr(imsic->lpriv);
> +#endif
> +	unsigned long flags;
> +
> +	raw_spin_lock_irqsave(&imsic->matrix_lock, flags);
> +	irq_matrix_offline(imsic->matrix);
> +	raw_spin_unlock_irqrestore(&imsic->matrix_lock, flags);
> +
> +#ifdef CONFIG_SMP
> +	raw_spin_lock_irqsave(&lpriv->lock, flags);
> +	WARN_ON_ONCE(try_to_del_timer_sync(&lpriv->timer) < 0);
> +	raw_spin_unlock_irqrestore(&lpriv->lock, flags);
> +#endif
> +}
> +
> +static int __init imsic_matrix_init(void)
> +{
> +	struct imsic_global_config *global = &imsic->global;
> +
> +	raw_spin_lock_init(&imsic->matrix_lock);
> +	imsic->matrix = irq_alloc_matrix(global->nr_ids + 1,
> +					 0, global->nr_ids + 1);
> +	if (!imsic->matrix)
> +		return -ENOMEM;
> +
> +	/* Reserve ID#0 because it is special and never implemented */
> +	irq_matrix_assign_system(imsic->matrix, 0, false);
> +
> +	/* Reserve IPI ID because it is special and used internally */
> +	irq_matrix_assign_system(imsic->matrix, IMSIC_IPI_ID, false);
> +
> +	return 0;
> +}
> +
> +static int __init imsic_get_parent_hartid(struct fwnode_handle *fwnode,
> +					  u32 index, unsigned long *hartid)
> +{
> +	struct of_phandle_args parent;
> +	int rc;
> +
> +	/*
> +	 * Currently, only OF fwnode is supported so extend this
> +	 * function for ACPI support.
> +	 */
> +	if (!is_of_node(fwnode))
> +		return -EINVAL;
> +
> +	rc = of_irq_parse_one(to_of_node(fwnode), index, &parent);
> +	if (rc)
> +		return rc;
> +
> +	/*
> +	 * Skip interrupts other than external interrupts for
> +	 * current privilege level.
> +	 */
> +	if (parent.args[0] != RV_IRQ_EXT)
> +		return -EINVAL;
> +
> +	return riscv_of_parent_hartid(parent.np, hartid);
> +}
> +
> +static int __init imsic_get_mmio_resource(struct fwnode_handle *fwnode,
> +					  u32 index, struct resource *res)
> +{
> +	/*
> +	 * Currently, only OF fwnode is supported so extend this
> +	 * function for ACPI support.
> +	 */
> +	if (!is_of_node(fwnode))
> +		return -EINVAL;
> +
> +	return of_address_to_resource(to_of_node(fwnode), index, res);
> +}
> +
> +static int __init imsic_parse_fwnode(struct fwnode_handle *fwnode,
> +				     struct imsic_global_config *global,
> +				     u32 *nr_parent_irqs,
> +				     u32 *nr_mmios)
> +{
> +	unsigned long hartid;
> +	struct resource res;
> +	int rc;
> +	u32 i;
> +
> +	/*
> +	 * Currently, only OF fwnode is supported so extend this
> +	 * function for ACPI support.
> +	 */
> +	if (!is_of_node(fwnode))
> +		return -EINVAL;
> +
> +	*nr_parent_irqs = 0;
> +	*nr_mmios = 0;
> +
> +	/* Find number of parent interrupts */
> +	while (!imsic_get_parent_hartid(fwnode, *nr_parent_irqs, &hartid))
> +		(*nr_parent_irqs)++;
> +	if (!(*nr_parent_irqs)) {

Nit: Redundant parenthesis

> +		pr_err("%pfwP: no parent irqs available\n", fwnode);
> +		return -EINVAL;
> +	}
> +
> +	/* Find number of guest index bits in MSI address */
> +	rc = of_property_read_u32(to_of_node(fwnode), "riscv,guest-index-bits",
> +				  &global->guest_index_bits);
> +	if (rc)
> +		global->guest_index_bits = 0;
> +
> +	/* Find number of HART index bits */
> +	rc = of_property_read_u32(to_of_node(fwnode), "riscv,hart-index-bits",
> +				  &global->hart_index_bits);
> +	if (rc) {
> +		/* Assume default value */
> +		global->hart_index_bits = __fls(*nr_parent_irqs);
> +		if (BIT(global->hart_index_bits) < *nr_parent_irqs)
> +			global->hart_index_bits++;
> +	}
> +
> +	/* Find number of group index bits */
> +	rc = of_property_read_u32(to_of_node(fwnode), "riscv,group-index-bits",
> +				  &global->group_index_bits);
> +	if (rc)
> +		global->group_index_bits = 0;
> +
> +	/*
> +	 * Find first bit position of group index.
> +	 * If not specified assumed the default APLIC-IMSIC configuration.
> +	 */
> +	rc = of_property_read_u32(to_of_node(fwnode), "riscv,group-index-shift",
> +				  &global->group_index_shift);
> +	if (rc)
> +		global->group_index_shift = IMSIC_MMIO_PAGE_SHIFT * 2;
> +
> +	/* Find number of interrupt identities */
> +	rc = of_property_read_u32(to_of_node(fwnode), "riscv,num-ids",
> +				  &global->nr_ids);
> +	if (rc) {
> +		pr_err("%pfwP: number of interrupt identities not found\n",
> +			fwnode);
> +		return rc;
> +	}
> +
> +	/* Find number of guest interrupt identities */
> +	rc = of_property_read_u32(to_of_node(fwnode), "riscv,num-guest-ids",
> +				  &global->nr_guest_ids);
> +	if (rc)
> +		global->nr_guest_ids = global->nr_ids;
> +
> +	/* Sanity check guest index bits */
> +	i = BITS_PER_LONG - IMSIC_MMIO_PAGE_SHIFT;
> +	if (i < global->guest_index_bits) {
> +		pr_err("%pfwP: guest index bits too big\n", fwnode);
> +		return -EINVAL;
> +	}
> +
> +	/* Sanity check HART index bits */
> +	i = BITS_PER_LONG - IMSIC_MMIO_PAGE_SHIFT - global->guest_index_bits;
> +	if (i < global->hart_index_bits) {
> +		pr_err("%pfwP: HART index bits too big\n", fwnode);
> +		return -EINVAL;
> +	}
> +
> +	/* Sanity check group index bits */
> +	i = BITS_PER_LONG - IMSIC_MMIO_PAGE_SHIFT -
> +	    global->guest_index_bits - global->hart_index_bits;
> +	if (i < global->group_index_bits) {
> +		pr_err("%pfwP: group index bits too big\n", fwnode);
> +		return -EINVAL;
> +	}
> +
> +	/* Sanity check group index shift */
> +	i = global->group_index_bits + global->group_index_shift - 1;
> +	if (i >= BITS_PER_LONG) {
> +		pr_err("%pfwP: group index shift too big\n", fwnode);
> +		return -EINVAL;
> +	}
> +
> +	/* Sanity check number of interrupt identities */
> +	if ((global->nr_ids < IMSIC_MIN_ID) ||
> +	    (global->nr_ids >= IMSIC_MAX_ID) ||
> +	    ((global->nr_ids & IMSIC_MIN_ID) != IMSIC_MIN_ID)) {
> +		pr_err("%pfwP: invalid number of interrupt identities\n",
> +			fwnode);

Nit: 100 chars

> +		return -EINVAL;
> +	}
> +
> +	/* Sanity check number of guest interrupt identities */
> +	if ((global->nr_guest_ids < IMSIC_MIN_ID) ||
> +	    (global->nr_guest_ids >= IMSIC_MAX_ID) ||
> +	    ((global->nr_guest_ids & IMSIC_MIN_ID) != IMSIC_MIN_ID)) {
> +		pr_err("%pfwP: invalid number of guest interrupt identities\n",
> +			fwnode);

Nit: 100 chars

> +		return -EINVAL;
> +	}
> +
> +	/* Compute base address */
> +	rc = imsic_get_mmio_resource(fwnode, 0, &res);
> +	if (rc) {
> +		pr_err("%pfwP: first MMIO resource not found\n", fwnode);
> +		return -EINVAL;
> +	}
> +	global->base_addr = res.start;
> +	global->base_addr &= ~(BIT(global->guest_index_bits +
> +				   global->hart_index_bits +
> +				   IMSIC_MMIO_PAGE_SHIFT) - 1);
> +	global->base_addr &= ~((BIT(global->group_index_bits) - 1) <<
> +			       global->group_index_shift);
> +
> +	/* Find number of MMIO register sets */
> +	while (!imsic_get_mmio_resource(fwnode, *nr_mmios, &res))
> +		(*nr_mmios)++;
> +
> +	return 0;
> +}
> +
> +int __init imsic_setup_state(struct fwnode_handle *fwnode)
> +{
> +	u32 i, j, index, nr_parent_irqs, nr_mmios, nr_handlers = 0;
> +	struct imsic_global_config *global;
> +	struct imsic_local_config *local;
> +	void __iomem **mmios_va = NULL;
> +	struct resource *mmios = NULL;
> +	unsigned long reloff, hartid;
> +	phys_addr_t base_addr;
> +	int rc, cpu;
> +
> +	/*
> +	 * Only one IMSIC instance allowed in a platform for clean
> +	 * implementation of SMP IRQ affinity and per-CPU IPIs.
> +	 *
> +	 * This means on a multi-socket (or multi-die) platform we
> +	 * will have multiple MMIO regions for one IMSIC instance.
> +	 */
> +	if (imsic) {
> +		pr_err("%pfwP: already initialized hence ignoring\n",
> +			fwnode);

Nit: 100 chars

> +		return -EALREADY;
> +	}
> +
> +	if (!riscv_isa_extension_available(NULL, SxAIA)) {
> +		pr_err("%pfwP: AIA support not available\n", fwnode);
> +		return -ENODEV;
> +	}
> +
> +	imsic = kzalloc(sizeof(*imsic), GFP_KERNEL);
> +	if (!imsic)
> +		return -ENOMEM;
> +	imsic->fwnode = fwnode;
> +	global = &imsic->global;
> +
> +	global->local = alloc_percpu(typeof(*(global->local)));
> +	if (!global->local) {
> +		rc = -ENOMEM;
> +		goto out_free_priv;
> +	}
> +
> +	/* Parse IMSIC fwnode */
> +	rc = imsic_parse_fwnode(fwnode, global, &nr_parent_irqs, &nr_mmios);
> +	if (rc)
> +		goto out_free_local;
> +
> +	/* Allocate MMIO resource array */
> +	mmios = kcalloc(nr_mmios, sizeof(*mmios), GFP_KERNEL);
> +	if (!mmios) {
> +		rc = -ENOMEM;
> +		goto out_free_local;
> +	}
> +
> +	/* Allocate MMIO virtual address array */
> +	mmios_va = kcalloc(nr_mmios, sizeof(*mmios_va), GFP_KERNEL);
> +	if (!mmios_va) {
> +		rc = -ENOMEM;
> +		goto out_iounmap;
> +	}
> +
> +	/* Parse and map MMIO register sets */
> +	for (i = 0; i < nr_mmios; i++) {
> +		rc = imsic_get_mmio_resource(fwnode, i, &mmios[i]);
> +		if (rc) {
> +			pr_err("%pfwP: unable to parse MMIO regset %d\n",
> +				fwnode, i);

Nit: 100 chars

> +			goto out_iounmap;
> +		}
> +
> +		base_addr = mmios[i].start;
> +		base_addr &= ~(BIT(global->guest_index_bits +
> +				   global->hart_index_bits +
> +				   IMSIC_MMIO_PAGE_SHIFT) - 1);
> +		base_addr &= ~((BIT(global->group_index_bits) - 1) <<
> +			       global->group_index_shift);
> +		if (base_addr != global->base_addr) {
> +			rc = -EINVAL;
> +			pr_err("%pfwP: address mismatch for regset %d\n",
> +				fwnode, i);

Nit: 100 chars... and all the places below where applicable.


Björn
  
Björn Töpel Feb. 20, 2024, 11:53 a.m. UTC | #2
Anup Patel <apatel@ventanamicro.com> writes:

> The RISC-V advanced interrupt architecture (AIA) specification
> defines a new MSI controller called incoming message signalled
> interrupt controller (IMSIC) which manages MSI on per-HART (or
> per-CPU) basis. It also supports IPIs as software injected MSIs.
> (For more details refer https://github.com/riscv/riscv-aia)
>
> Let us add an early irqchip driver for RISC-V IMSIC which sets
> up the IMSIC state and provide IPIs.
>
> Signed-off-by: Anup Patel <apatel@ventanamicro.com>

This patch has a couple of checkpatch issues:

CHECK: Alignment should match open parenthesis
CHECK: Please don't use multiple blank lines
CHECK: Please use a blank line after function/struct/union/enum declarations
CHECK: Unnecessary parentheses around 'global->nr_guest_ids < IMSIC_MIN_ID'
CHECK: Unnecessary parentheses around 'global->nr_guest_ids >= IMSIC_MAX_ID'
CHECK: Unnecessary parentheses around 'global->nr_ids < IMSIC_MIN_ID'
CHECK: Unnecessary parentheses around 'global->nr_ids >= IMSIC_MAX_ID'
CHECK: Unnecessary parentheses around global->local
CHECK: Unnecessary parentheses around imsic->lpriv
CHECK: extern prototypes should be avoided in .h files

Björn
  
Anup Patel Feb. 20, 2024, 1 p.m. UTC | #3
On Tue, Feb 20, 2024 at 5:22 PM Björn Töpel <bjorn@kernelorg> wrote:
>
> Anup,
>
> This version is so much easier to follow! Thanks a lot for then
> cleanups/design changes.
>
> A bunch of nits, and a major one, below.

Sure, I will address the nits in the next revision.

>
> Anup Patel <apatel@ventanamicro.com> writes:
>
> > The RISC-V advanced interrupt architecture (AIA) specification
> > defines a new MSI controller called incoming message signalled
> > interrupt controller (IMSIC) which manages MSI on per-HART (or
> > per-CPU) basis. It also supports IPIs as software injected MSIs.
> > (For more details refer https://github.com/riscv/riscv-aia)
> >
> > Let us add an early irqchip driver for RISC-V IMSIC which sets
> > up the IMSIC state and provide IPIs.
> >
> > Signed-off-by: Anup Patel <apatel@ventanamicro.com>
> > ---
> >  drivers/irqchip/Kconfig                 |   7 +
> >  drivers/irqchip/Makefile                |   1 +
> >  drivers/irqchip/irq-riscv-imsic-early.c | 213 ++++++
> >  drivers/irqchip/irq-riscv-imsic-state.c | 906 ++++++++++++++++++++++++
> >  drivers/irqchip/irq-riscv-imsic-state.h |  98 +++
> >  include/linux/irqchip/riscv-imsic.h     |  87 +++
> >  6 files changed, 1312 insertions(+)
> >  create mode 100644 drivers/irqchip/irq-riscv-imsic-early.c
> >  create mode 100644 drivers/irqchip/irq-riscv-imsic-state.c
> >  create mode 100644 drivers/irqchip/irq-riscv-imsic-state.h
> >  create mode 100644 include/linux/irqchip/riscv-imsic.h
> >
> > diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
> > index f7149d0f3d45..85f86e31c996 100644
> > --- a/drivers/irqchip/Kconfig
> > +++ b/drivers/irqchip/Kconfig
> > @@ -546,6 +546,13 @@ config SIFIVE_PLIC
> >       select IRQ_DOMAIN_HIERARCHY
> >       select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP
> >
> > +config RISCV_IMSIC
> > +     bool
> > +     depends on RISCV
> > +     select IRQ_DOMAIN_HIERARCHY
> > +     select GENERIC_IRQ_MATRIX_ALLOCATOR
> > +     select GENERIC_MSI_IRQ
> > +
> >  config EXYNOS_IRQ_COMBINER
> >       bool "Samsung Exynos IRQ combiner support" if COMPILE_TEST
> >       depends on (ARCH_EXYNOS && ARM) || COMPILE_TEST
> > diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
> > index ffd945fe71aa..d714724387ce 100644
> > --- a/drivers/irqchip/Makefile
> > +++ b/drivers/irqchip/Makefile
> > @@ -95,6 +95,7 @@ obj-$(CONFIG_QCOM_MPM)                      += irq-qcom-mpm.o
> >  obj-$(CONFIG_CSKY_MPINTC)            += irq-csky-mpintc.o
> >  obj-$(CONFIG_CSKY_APB_INTC)          += irq-csky-apb-intc.o
> >  obj-$(CONFIG_RISCV_INTC)             += irq-riscv-intc.o
> > +obj-$(CONFIG_RISCV_IMSIC)            += irq-riscv-imsic-state.o irq-riscv-imsic-early.o
> >  obj-$(CONFIG_SIFIVE_PLIC)            += irq-sifive-plic.o
> >  obj-$(CONFIG_IMX_IRQSTEER)           += irq-imx-irqsteer.o
> >  obj-$(CONFIG_IMX_INTMUX)             += irq-imx-intmux.o
> > diff --git a/drivers/irqchip/irq-riscv-imsic-early.c b/drivers/irqchip/irq-riscv-imsic-early.c
> > new file mode 100644
> > index 000000000000..32fe428b1c19
> > --- /dev/null
> > +++ b/drivers/irqchip/irq-riscv-imsic-early.c
> > @@ -0,0 +1,213 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Copyright (C) 2021 Western Digital Corporation or its affiliates.
> > + * Copyright (C) 2022 Ventana Micro Systems Inc.
> > + */
> > +
> > +#define pr_fmt(fmt) "riscv-imsic: " fmt
> > +#include <linux/cpu.h>
> > +#include <linux/interrupt.h>
> > +#include <linux/io.h>
> > +#include <linux/irq.h>
> > +#include <linux/irqchip.h>
> > +#include <linux/irqchip/chained_irq.h>
> > +#include <linux/module.h>
> > +#include <linux/spinlock.h>
> > +#include <linux/smp.h>
> > +
> > +#include "irq-riscv-imsic-state.h"
> > +
> > +static int imsic_parent_irq;
> > +
> > +#ifdef CONFIG_SMP
> > +static void imsic_ipi_send(unsigned int cpu)
> > +{
> > +     struct imsic_local_config *local = per_cpu_ptr(imsic->global.local, cpu);
> > +
> > +     writel_relaxed(IMSIC_IPI_ID, local->msi_va);
> > +}
> > +
> > +static void imsic_ipi_starting_cpu(void)
> > +{
> > +     /* Enable IPIs for current CPU. */
> > +     __imsic_id_set_enable(IMSIC_IPI_ID);
> > +}
> > +
> > +static void imsic_ipi_dying_cpu(void)
> > +{
> > +     /* Disable IPIs for current CPU. */
> > +     __imsic_id_clear_enable(IMSIC_IPI_ID);
> > +}
> > +
> > +static int __init imsic_ipi_domain_init(void)
> > +{
> > +     int virq;
> > +
> > +     /* Create IMSIC IPI multiplexing */
> > +     virq = ipi_mux_create(IMSIC_NR_IPI, imsic_ipi_send);
> > +     if (virq <= 0)
> > +             return (virq < 0) ? virq : -ENOMEM;
>
> Nit: No parenthesis need to clutter.
>
> > +
> > +     /* Set vIRQ range */
> > +     riscv_ipi_set_virq_range(virq, IMSIC_NR_IPI, true);
> > +
> > +     /* Announce that IMSIC is providing IPIs */
> > +     pr_info("%pfwP: providing IPIs using interrupt %d\n", imsic->fwnode, IMSIC_IPI_ID);
> > +
> > +     return 0;
> > +}
> > +#else
> > +static void imsic_ipi_starting_cpu(void)
> > +{
> > +}
> > +
> > +static void imsic_ipi_dying_cpu(void)
> > +{
> > +}
> > +
> > +static int __init imsic_ipi_domain_init(void)
> > +{
> > +     return 0;
> > +}
> > +#endif
> > +
> > +/*
> > + * To handle an interrupt, we read the TOPEI CSR and write zero in one
> > + * instruction. If TOPEI CSR is non-zero then we translate TOPEI.ID to
> > + * Linux interrupt number and let Linux IRQ subsystem handle it.
> > + */
> > +static void imsic_handle_irq(struct irq_desc *desc)
> > +{
> > +     struct irq_chip *chip = irq_desc_get_chip(desc);
> > +     int err, cpu = smp_processor_id();
> > +     struct imsic_vector *vec;
> > +     unsigned long local_id;
> > +
> > +     chained_irq_enter(chip, desc);
> > +
> > +     while ((local_id = csr_swap(CSR_TOPEI, 0))) {
> > +             local_id = local_id >> TOPEI_ID_SHIFT;
>
> Nit: Wdyt about moving shift into the loop predicate, or using >>=?
>
> > +
> > +             if (local_id == IMSIC_IPI_ID) {
> > +#ifdef CONFIG_SMP
> > +                     ipi_mux_process();
> > +#endif
>
> Is IMSIC_IPI_ID a thing on !IS_ENABLED(CONFIG_SMP)?
>
> > +                     continue;
> > +             }
> > +
> > +             if (unlikely(!imsic->base_domain))
> > +                     continue;
> > +
> > +             vec = imsic_vector_from_local_id(cpu, local_id);
> > +             if (!vec) {
> > +                     pr_warn_ratelimited("vector not found for local ID 0x%lx\n", local_id);
> > +                     continue;
> > +             }
> > +
> > +             err = generic_handle_domain_irq(imsic->base_domain,
> > +                                             vec->hwirq);
>
> Nit: 100 chars
>
> > +             if (unlikely(err))
> > +                     pr_warn_ratelimited("hwirq 0x%x mapping not found\n", vec->hwirq);
> > +     }
> > +
> > +     chained_irq_exit(chip, desc);
> > +}
> > +
> > +static int imsic_starting_cpu(unsigned int cpu)
> > +{
> > +     /* Mark per-CPU IMSIC state as online */
> > +     imsic_state_online();
> > +
> > +     /* Enable per-CPU parent interrupt */
> > +     enable_percpu_irq(imsic_parent_irq, irq_get_trigger_type(imsic_parent_irq));
> > +
> > +     /* Setup IPIs */
> > +     imsic_ipi_starting_cpu();
> > +
> > +     /*
> > +      * Interrupts identities might have been enabled/disabled while
> > +      * this CPU was not running so sync-up local enable/disable state.
> > +      */
> > +     imsic_local_sync_all();
> > +
> > +     /* Enable local interrupt delivery */
> > +     imsic_local_delivery(true);
> > +
> > +     return 0;
> > +}
> > +
> > +static int imsic_dying_cpu(unsigned int cpu)
> > +{
> > +     /* Cleanup IPIs */
> > +     imsic_ipi_dying_cpu();
> > +
> > +     /* Mark per-CPU IMSIC state as offline */
> > +     imsic_state_offline();
> > +
> > +     return 0;
> > +}
> > +
> > +static int __init imsic_early_probe(struct fwnode_handle *fwnode)
> > +{
> > +     struct irq_domain *domain;
> > +     int rc;
> > +
> > +     /* Find parent domain and register chained handler */
> > +     domain = irq_find_matching_fwnode(riscv_get_intc_hwnode(), DOMAIN_BUS_ANY);
> > +     if (!domain) {
> > +             pr_err("%pfwP: Failed to find INTC domain\n", fwnode);
> > +             return -ENOENT;
> > +     }
> > +     imsic_parent_irq = irq_create_mapping(domain, RV_IRQ_EXT);
> > +     if (!imsic_parent_irq) {
> > +             pr_err("%pfwP: Failed to create INTC mapping\n", fwnode);
> > +             return -ENOENT;
> > +     }
> > +
> > +     /* Initialize IPI domain */
> > +     rc = imsic_ipi_domain_init();
> > +     if (rc) {
> > +             pr_err("%pfwP: Failed to initialize IPI domain\n", fwnode);
> > +             return rc;
> > +     }
> > +
> > +     /* Setup chained handler to the parent domain interrupt */
> > +     irq_set_chained_handler(imsic_parent_irq, imsic_handle_irq);
> > +
> > +     /*
> > +      * Setup cpuhp state (must be done after setting imsic_parent_irq)
> > +      *
> > +      * Don't disable per-CPU IMSIC file when CPU goes offline
> > +      * because this affects IPI and the masking/unmasking of
> > +      * virtual IPIs is done via generic IPI-Mux
> > +      */
> > +     cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "irqchip/riscv/imsic:starting",
> > +                       imsic_starting_cpu, imsic_dying_cpu);
> > +
> > +     return 0;
> > +}
> > +
> > +static int __init imsic_early_dt_init(struct device_node *node,
> > +                                   struct device_node *parent)
> > +{
> > +     struct fwnode_handle *fwnode = &node->fwnode;
> > +     int rc;
> > +
> > +     /* Setup IMSIC state */
> > +     rc = imsic_setup_state(fwnode);
> > +     if (rc) {
> > +             pr_err("%pfwP: failed to setup state (error %d)\n",
> > +                     fwnode, rc);
>
> Nit. 100 chars
>
> > +             return rc;
> > +     }
> > +
> > +     /* Do early setup of IPIs */
> > +     rc = imsic_early_probe(fwnode);
> > +     if (rc)
> > +             return rc;
> > +
> > +     /* Ensure that OF platform device gets probed */
> > +     of_node_clear_flag(node, OF_POPULATED);
> > +     return 0;
> > +}
> > +IRQCHIP_DECLARE(riscv_imsic, "riscv,imsics", imsic_early_dt_init);
> > diff --git a/drivers/irqchip/irq-riscv-imsic-state.c b/drivers/irqchip/irq-riscv-imsic-state.c
> > new file mode 100644
> > index 000000000000..4f347486ec7c
> > --- /dev/null
> > +++ b/drivers/irqchip/irq-riscv-imsic-state.c
> > @@ -0,0 +1,906 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Copyright (C) 2021 Western Digital Corporation or its affiliates.
> > + * Copyright (C) 2022 Ventana Micro Systems Inc.
> > + */
> > +
> > +#define pr_fmt(fmt) "riscv-imsic: " fmt
> > +#include <linux/cpu.h>
> > +#include <linux/bitmap.h>
> > +#include <linux/interrupt.h>
> > +#include <linux/irq.h>
> > +#include <linux/module.h>
> > +#include <linux/of.h>
> > +#include <linux/of_address.h>
> > +#include <linux/of_irq.h>
> > +#include <linux/seq_file.h>
> > +#include <linux/spinlock.h>
> > +#include <linux/smp.h>
> > +#include <asm/hwcap.h>
> > +
> > +#include "irq-riscv-imsic-state.h"
> > +
> > +#define IMSIC_DISABLE_EIDELIVERY             0
> > +#define IMSIC_ENABLE_EIDELIVERY                      1
> > +#define IMSIC_DISABLE_EITHRESHOLD            1
> > +#define IMSIC_ENABLE_EITHRESHOLD             0
> > +
> > +static inline void imsic_csr_write(unsigned long reg, unsigned long val)
> > +{
> > +     csr_write(CSR_ISELECT, reg);
> > +     csr_write(CSR_IREG, val);
> > +}
> > +
> > +static inline unsigned long imsic_csr_read(unsigned long reg)
> > +{
> > +     csr_write(CSR_ISELECT, reg);
> > +     return csr_read(CSR_IREG);
> > +}
> > +
> > +static inline unsigned long imsic_csr_read_clear(unsigned long reg, unsigned long val)
> > +{
> > +     csr_write(CSR_ISELECT, reg);
> > +     return csr_read_clear(CSR_IREG, val);
> > +}
> > +
> > +static inline void imsic_csr_set(unsigned long reg, unsigned long val)
> > +{
> > +     csr_write(CSR_ISELECT, reg);
> > +     csr_set(CSR_IREG, val);
> > +}
> > +
> > +static inline void imsic_csr_clear(unsigned long reg, unsigned long val)
> > +{
> > +     csr_write(CSR_ISELECT, reg);
> > +     csr_clear(CSR_IREG, val);
> > +}
> > +
> > +struct imsic_priv *imsic;
> > +
> > +const struct imsic_global_config *imsic_get_global_config(void)
> > +{
> > +     return imsic ? &imsic->global : NULL;
> > +}
> > +EXPORT_SYMBOL_GPL(imsic_get_global_config);
> > +
> > +static bool __imsic_eix_read_clear(unsigned long id, bool pend)
> > +{
> > +     unsigned long isel, imask;
> > +
> > +     isel = id / BITS_PER_LONG;
> > +     isel *= BITS_PER_LONG / IMSIC_EIPx_BITS;
> > +     isel += pend ? IMSIC_EIP0 : IMSIC_EIE0;
> > +     imask = BIT(id & (__riscv_xlen - 1));
> > +
> > +     return (imsic_csr_read_clear(isel, imask) & imask) ? true : false;
>
> Nit: use return !!(imsic_csr_read_clear(isel, imask) & imask)
>
> > +}
> > +
> > +static inline bool __imsic_id_read_clear_enabled(unsigned long id)
> > +{
> > +     return __imsic_eix_read_clear(id, false);
> > +}
> > +
> > +static inline bool __imsic_id_read_clear_pending(unsigned long id)
> > +{
> > +     return __imsic_eix_read_clear(id, true);
> > +}
> > +
> > +void __imsic_eix_update(unsigned long base_id, unsigned long num_id, bool pend, bool val)
> > +{
> > +     unsigned long id = base_id, last_id = base_id + num_id;
> > +     unsigned long i, isel, ireg;
> > +
> > +     while (id < last_id) {
> > +             isel = id / BITS_PER_LONG;
> > +             isel *= BITS_PER_LONG / IMSIC_EIPx_BITS;
> > +             isel += (pend) ? IMSIC_EIP0 : IMSIC_EIE0;
>
> Nit: Redundant parenthesis.
>
> > +
> > +             /*
> > +              * Prepare the ID mask to be programmed in the
> > +              * IMSIC EIEx and EIPx registers. These registers
> > +              * are XLEN-wide and we must not touch IDs which
> > +              * are < base_id and >= (base_id + num_id).
> > +              */
> > +             ireg = 0;
> > +             for (i = id & (__riscv_xlen - 1); (id < last_id) && (i < __riscv_xlen); i++) {
>
> Nit: Redundant parenthesis "(id < last_id) && (i < __riscv_xlen)", which
> is also inconsistent with other usage in this changeset.
>
> > +                     ireg |= BIT(i);
> > +                     id++;
> > +             }
> > +
> > +             /*
> > +              * The IMSIC EIEx and EIPx registers are indirectly
> > +              * accessed via using ISELECT and IREG CSRs so we
> > +              * need to access these CSRs without getting preempted.
> > +              *
> > +              * All existing users of this function call this
> > +              * function with local IRQs disabled so we don't
> > +              * need to do anything special here.
> > +              */
> > +             if (val)
> > +                     imsic_csr_set(isel, ireg);
> > +             else
> > +                     imsic_csr_clear(isel, ireg);
> > +     }
> > +}
> > +
> > +/* MUST be called with lpriv->lock held */
> > +static void __imsic_local_sync(struct imsic_local_priv *lpriv)
> > +{
> > +     struct imsic_local_config *mlocal;
> > +     struct imsic_vector *vec, *mvec;
> > +     int i;
> > +
> > +     /* This pairs with the barrier in __imsic_remote_sync(). */
> > +     smp_mb();
> > +
> > +     for_each_set_bit(i, lpriv->dirty_bitmap, imsic->global.nr_ids + 1) {
> > +             if (!i || i == IMSIC_IPI_ID)
> > +                     goto skip;
> > +             vec = &lpriv->vectors[i];
> > +
> > +             if (vec->enable)
> > +                     __imsic_id_set_enable(i);
> > +             else
> > +                     __imsic_id_clear_enable(i);
> > +
> > +             /*
> > +              * If the ID was being moved to a new ID on some other CPU
> > +              * then we can get a MSI during the movement so check the
> > +              * ID pending bit and re-trigger the new ID on other CPU
> > +              * using MMIO write.
> > +              */
> > +             mvec = vec->move;
> > +             vec->move = NULL;
> > +             if (mvec && mvec != vec) {
> > +                     if (__imsic_id_read_clear_pending(i)) {
> > +                             mlocal = per_cpu_ptr(imsic->global.local, mvec->cpu);
> > +                             writel_relaxed(mvec->local_id, mlocal->msi_va);
> > +                     }
> > +
> > +                     imsic_vector_free(&lpriv->vectors[i]);
> > +             }
> > +
> > +skip:
> > +             bitmap_clear(lpriv->dirty_bitmap, i, 1);
> > +     }
> > +}
> > +
> > +void imsic_local_sync_all(void)
> > +{
> > +     struct imsic_local_priv *lpriv = this_cpu_ptr(imsic->lpriv);
> > +     unsigned long flags;
> > +
> > +     raw_spin_lock_irqsave(&lpriv->lock, flags);
> > +     bitmap_fill(lpriv->dirty_bitmap, imsic->global.nr_ids + 1);
> > +     __imsic_local_sync(lpriv);
> > +     raw_spin_unlock_irqrestore(&lpriv->lock, flags);
> > +}
> > +
> > +void imsic_local_delivery(bool enable)
> > +{
> > +     if (enable) {
> > +             imsic_csr_write(IMSIC_EITHRESHOLD, IMSIC_ENABLE_EITHRESHOLD);
> > +             imsic_csr_write(IMSIC_EIDELIVERY, IMSIC_ENABLE_EIDELIVERY);
> > +             return;
> > +     }
> > +
> > +     imsic_csr_write(IMSIC_EIDELIVERY, IMSIC_DISABLE_EIDELIVERY);
> > +     imsic_csr_write(IMSIC_EITHRESHOLD, IMSIC_DISABLE_EITHRESHOLD);
> > +}
> > +
> > +#ifdef CONFIG_SMP
> > +static void imsic_local_timer_callback(struct timer_list *timer)
> > +{
> > +     struct imsic_local_priv *lpriv = this_cpu_ptr(imsic->lpriv);
> > +     unsigned long flags;
> > +
> > +     raw_spin_lock_irqsave(&lpriv->lock, flags);
> > +     __imsic_local_sync(lpriv);
> > +     raw_spin_unlock_irqrestore(&lpriv->lock, flags);
> > +}
> > +
> > +/* MUST be called with lpriv->lock held */
> > +static void __imsic_remote_sync(struct imsic_local_priv *lpriv, unsigned int cpu)
> > +{
> > +     /*
> > +      * Ensure that changes to vector enable, vector move and
> > +      * dirty bitmap are visible to the target CPU.
> > +      *
> > +      * This pairs with the barrier in __imsic_local_sync().
> > +      */
> > +     smp_mb();
> > +
> > +     /*
> > +      * We schedule a timer on the target CPU if the target CPU is not
> > +      * same as the current CPU. An offline CPU will unconditionally
> > +      * synchronize IDs through imsic_starting_cpu() when the
> > +      * CPU is brought up.
> > +      */
> > +     if (cpu_online(cpu)) {
> > +             if (cpu != smp_processor_id()) {
> > +                     if (!timer_pending(&lpriv->timer)) {
> > +                             lpriv->timer.expires = jiffies + 1;
> > +                             add_timer_on(&lpriv->timer, cpu);
> > +                     }
> > +             } else {
> > +                     __imsic_local_sync(lpriv);
> > +             }
>
> Nit: Early exit/return vs else-clause for readability
>
>
> > +     }
> > +}
> > +#else
> > +/* MUST be called with lpriv->lock held */
> > +static void __imsic_remote_sync(struct imsic_local_priv *lpriv, unsigned int cpu)
> > +{
> > +     __imsic_local_sync(lpriv);
> > +}
> > +#endif
> > +
> > +void imsic_vector_mask(struct imsic_vector *vec)
> > +{
> > +     struct imsic_local_priv *lpriv;
> > +
> > +     lpriv = per_cpu_ptr(imsic->lpriv, vec->cpu);
> > +     if (WARN_ON(&lpriv->vectors[vec->local_id] != vec))
> > +             return;
> > +
> > +     /*
> > +      * This function is called through Linux irq subsystem with
> > +      * irqs disabled so no need to save/restore irq flags.
> > +      */
> > +
> > +     raw_spin_lock(&lpriv->lock);
> > +
> > +     vec->enable = false;
> > +     bitmap_set(lpriv->dirty_bitmap, vec->local_id, 1);
> > +     __imsic_remote_sync(lpriv, vec->cpu);
> > +
> > +     raw_spin_unlock(&lpriv->lock);
> > +}
>
> Really nice that you're using a timer for the vector affinity change,
> and got rid of the special/weird IMSIC/sync IPI. Can you really use a
> timer for mask/unmask? That makes the mask/unmask operation
> asynchronous!
>
> That was what I was trying to get though with this comment:
> https://lore.kernel.org/linux-riscv/87sf24mo1g.fsf@all.your.base.are.belong.to.us/
>
> Also, using the smp_* IPI functions, you can pass arguments, so you
> don't need the dirty_bitmap tracking the changes.

The mask/unmask operations are called with irqs disabled so if
CPU X does synchronous IPI to another CPU Y from mask/unmask
operation then while CPU X is waiting for IPI to complete it cannot
receive IPI from other CPUs which can lead to crashes and stalls.

In general, we should not do any block/busy-wait work in
mask/unmask operation of an irqchip driver.

The AIA IMSIC spec allows setting ID pending bit using MSI write
irrespective whether ID is enabled or not but the interrupt will be
taken only after ID is enabled. In other words, there will be no
loss of interrupt with delayed mask/unmask using async IPI or
lazy timer.

>
> > +
> > +void imsic_vector_unmask(struct imsic_vector *vec)
> > +{
> > +     struct imsic_local_priv *lpriv;
> > +
> > +     lpriv = per_cpu_ptr(imsic->lpriv, vec->cpu);
> > +     if (WARN_ON(&lpriv->vectors[vec->local_id] != vec))
> > +             return;
> > +
> > +     /*
> > +      * This function is called through Linux irq subsystem with
> > +      * irqs disabled so no need to save/restore irq flags.
> > +      */
> > +
> > +     raw_spin_lock(&lpriv->lock);
> > +
> > +     vec->enable = true;
> > +     bitmap_set(lpriv->dirty_bitmap, vec->local_id, 1);
> > +     __imsic_remote_sync(lpriv, vec->cpu);
> > +
> > +     raw_spin_unlock(&lpriv->lock);
> > +}
> > +
> > +
> > +bool imsic_vector_isenabled(struct imsic_vector *vec)
> > +{
> > +     struct imsic_local_priv *lpriv = per_cpu_ptr(imsic->lpriv, vec->cpu);
> > +     unsigned long flags;
> > +     bool ret;
> > +
> > +     raw_spin_lock_irqsave(&lpriv->lock, flags);
> > +     ret = vec->enable;
> > +     raw_spin_unlock_irqrestore(&lpriv->lock, flags);
> > +
> > +     return ret;
> > +}
> > +
> > +struct imsic_vector *imsic_vector_get_move(struct imsic_vector *vec)
> > +{
> > +     struct imsic_local_priv *lpriv = per_cpu_ptr(imsic->lpriv, vec->cpu);
> > +     struct imsic_vector *ret;
> > +     unsigned long flags;
> > +
> > +     raw_spin_lock_irqsave(&lpriv->lock, flags);
> > +     ret = vec->move;
> > +     raw_spin_unlock_irqrestore(&lpriv->lock, flags);
> > +
> > +     return ret;
> > +}
> > +
> > +static bool imsic_vector_move_update(struct imsic_local_priv *lpriv, struct imsic_vector *vec,
> > +                                  bool new_enable, struct imsic_vector *new_move)
> > +{
> > +     unsigned long flags;
> > +     bool enabled;
> > +
> > +     raw_spin_lock_irqsave(&lpriv->lock, flags);
> > +
> > +     /* Update enable and move details */
> > +     enabled = vec->enable;
> > +     vec->enable = new_enable;
> > +     vec->move = new_move;
> > +
> > +     /* Mark the vector as dirty and synchronize */
> > +     bitmap_set(lpriv->dirty_bitmap, vec->local_id, 1);
> > +     __imsic_remote_sync(lpriv, vec->cpu);
> > +
> > +     raw_spin_unlock_irqrestore(&lpriv->lock, flags);
> > +
> > +     return enabled;
> > +}
> > +
> > +void imsic_vector_move(struct imsic_vector *old_vec, struct imsic_vector *new_vec)
> > +{
> > +     struct imsic_local_priv *old_lpriv, *new_lpriv;
> > +     bool enabled;
> > +
> > +     if (WARN_ON(old_vec->cpu == new_vec->cpu))
> > +             return;
> > +
> > +     old_lpriv = per_cpu_ptr(imsic->lpriv, old_vec->cpu);
> > +     if (WARN_ON(&old_lpriv->vectors[old_vec->local_id] != old_vec))
> > +             return;
> > +
> > +     new_lpriv = per_cpu_ptr(imsic->lpriv, new_vec->cpu);
> > +     if (WARN_ON(&new_lpriv->vectors[new_vec->local_id] != new_vec))
> > +             return;
> > +
> > +     /*
> > +      * Move and re-trigger the new vector based on the pending
> > +      * state of the old vector because we might get a device
> > +      * interrupt on the old vector while device was being moved
> > +      * to the new vector.
> > +      */
> > +     enabled = imsic_vector_move_update(old_lpriv, old_vec, false, new_vec);
> > +     imsic_vector_move_update(new_lpriv, new_vec, enabled, new_vec);
> > +}
> > +
> > +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
> > +void imsic_vector_debug_show(struct seq_file *m, struct imsic_vector *vec, int ind)
> > +{
> > +     struct imsic_local_priv *lpriv;
> > +     struct imsic_vector *mvec;
> > +     bool is_enabled;
> > +
> > +     lpriv = per_cpu_ptr(imsic->lpriv, vec->cpu);
> > +     if (WARN_ON(&lpriv->vectors[vec->local_id] != vec))
> > +             return;
> > +
> > +     is_enabled = imsic_vector_isenabled(vec);
> > +     mvec = imsic_vector_get_move(vec);
> > +
> > +     seq_printf(m, "%*starget_cpu      : %5u\n", ind, "", vec->cpu);
> > +     seq_printf(m, "%*starget_local_id : %5u\n", ind, "", vec->local_id);
> > +     seq_printf(m, "%*sis_reserved     : %5u\n", ind, "",
> > +                (vec->local_id <= IMSIC_IPI_ID) ? 1 : 0);
> > +     seq_printf(m, "%*sis_enabled      : %5u\n", ind, "", (is_enabled) ? 1 : 0);
> > +     seq_printf(m, "%*sis_move_pending : %5u\n", ind, "", (mvec) ? 1 : 0);
>
> Nit: Redundant parenthesis.
>
> > +     if (mvec) {
> > +             seq_printf(m, "%*smove_cpu        : %5u\n", ind, "", mvec->cpu);
> > +             seq_printf(m, "%*smove_local_id   : %5u\n", ind, "", mvec->local_id);
> > +     }
> > +}
> > +
> > +void imsic_vector_debug_show_summary(struct seq_file *m, int ind)
> > +{
> > +     irq_matrix_debug_show(m, imsic->matrix, ind);
> > +}
> > +#endif
> > +
> > +struct imsic_vector *imsic_vector_from_local_id(unsigned int cpu, unsigned int local_id)
> > +{
> > +     struct imsic_local_priv *lpriv = per_cpu_ptr(imsic->lpriv, cpu);
> > +
> > +     if (!lpriv || imsic->global.nr_ids < local_id)
> > +             return NULL;
> > +
> > +     return &lpriv->vectors[local_id];
> > +}
> > +
> > +struct imsic_vector *imsic_vector_alloc(unsigned int hwirq, const struct cpumask *mask)
> > +{
> > +     struct imsic_vector *vec = NULL;
> > +     struct imsic_local_priv *lpriv;
> > +     unsigned long flags;
> > +     unsigned int cpu;
> > +     int local_id;
> > +
> > +     raw_spin_lock_irqsave(&imsic->matrix_lock, flags);
> > +     local_id = irq_matrix_alloc(imsic->matrix, mask, false, &cpu);
> > +     raw_spin_unlock_irqrestore(&imsic->matrix_lock, flags);
> > +     if (local_id < 0)
> > +             return NULL;
> > +
> > +     lpriv = per_cpu_ptr(imsic->lpriv, cpu);
> > +     vec = &lpriv->vectors[local_id];
> > +     vec->hwirq = hwirq;
> > +     vec->enable = false;
> > +     vec->move = NULL;
> > +
> > +     return vec;
> > +}
> > +
> > +void imsic_vector_free(struct imsic_vector *vec)
> > +{
> > +     unsigned long flags;
> > +
> > +     raw_spin_lock_irqsave(&imsic->matrix_lock, flags);
> > +     vec->hwirq = UINT_MAX;
> > +     irq_matrix_free(imsic->matrix, vec->cpu, vec->local_id, false);
> > +     raw_spin_unlock_irqrestore(&imsic->matrix_lock, flags);
> > +}
> > +
> > +static void __init imsic_local_cleanup(void)
> > +{
> > +     int cpu;
> > +     struct imsic_local_priv *lpriv;
> > +
> > +     for_each_possible_cpu(cpu) {
> > +             lpriv = per_cpu_ptr(imsic->lpriv, cpu);
> > +
> > +             bitmap_free(lpriv->dirty_bitmap);
> > +             kfree(lpriv->vectors);
> > +     }
> > +
> > +     free_percpu(imsic->lpriv);
> > +}
> > +
> > +static int __init imsic_local_init(void)
> > +{
> > +     struct imsic_global_config *global = &imsic->global;
> > +     struct imsic_local_priv *lpriv;
> > +     struct imsic_vector *vec;
> > +     int cpu, i;
> > +
> > +     /* Allocate per-CPU private state */
> > +     imsic->lpriv = alloc_percpu(typeof(*(imsic->lpriv)));
> > +     if (!imsic->lpriv)
> > +             return -ENOMEM;
> > +
> > +     /* Setup per-CPU private state */
> > +     for_each_possible_cpu(cpu) {
> > +             lpriv = per_cpu_ptr(imsic->lpriv, cpu);
> > +
> > +             raw_spin_lock_init(&lpriv->lock);
> > +
> > +             /* Allocate dirty bitmap */
> > +             lpriv->dirty_bitmap = bitmap_zalloc(global->nr_ids + 1, GFP_KERNEL);
> > +             if (!lpriv->dirty_bitmap)
> > +                     goto fail_local_cleanup;
> > +
> > +#ifdef CONFIG_SMP
> > +             /* Setup lazy timer for synchronization */
> > +             timer_setup(&lpriv->timer, imsic_local_timer_callback, TIMER_PINNED);
> > +#endif
> > +
> > +             /* Allocate vector array */
> > +             lpriv->vectors = kcalloc(global->nr_ids + 1, sizeof(*lpriv->vectors),
> > +                                      GFP_KERNEL);
> > +             if (!lpriv->vectors)
> > +                     goto fail_local_cleanup;
> > +
> > +             /* Setup vector array */
> > +             for (i = 0; i <= global->nr_ids; i++) {
> > +                     vec = &lpriv->vectors[i];
> > +                     vec->cpu = cpu;
> > +                     vec->local_id = i;
> > +                     vec->hwirq = UINT_MAX;
> > +             }
> > +     }
> > +
> > +     return 0;
> > +
> > +fail_local_cleanup:
> > +     imsic_local_cleanup();
> > +     return -ENOMEM;
> > +}
> > +
> > +void imsic_state_online(void)
> > +{
> > +     unsigned long flags;
> > +
> > +     raw_spin_lock_irqsave(&imsic->matrix_lock, flags);
> > +     irq_matrix_online(imsic->matrix);
> > +     raw_spin_unlock_irqrestore(&imsic->matrix_lock, flags);
> > +}
> > +
> > +void imsic_state_offline(void)
> > +{
> > +#ifdef CONFIG_SMP
> > +     struct imsic_local_priv *lpriv = this_cpu_ptr(imsic->lpriv);
> > +#endif
> > +     unsigned long flags;
> > +
> > +     raw_spin_lock_irqsave(&imsic->matrix_lock, flags);
> > +     irq_matrix_offline(imsic->matrix);
> > +     raw_spin_unlock_irqrestore(&imsic->matrix_lock, flags);
> > +
> > +#ifdef CONFIG_SMP
> > +     raw_spin_lock_irqsave(&lpriv->lock, flags);
> > +     WARN_ON_ONCE(try_to_del_timer_sync(&lpriv->timer) < 0);
> > +     raw_spin_unlock_irqrestore(&lpriv->lock, flags);
> > +#endif
> > +}
> > +
> > +static int __init imsic_matrix_init(void)
> > +{
> > +     struct imsic_global_config *global = &imsic->global;
> > +
> > +     raw_spin_lock_init(&imsic->matrix_lock);
> > +     imsic->matrix = irq_alloc_matrix(global->nr_ids + 1,
> > +                                      0, global->nr_ids + 1);
> > +     if (!imsic->matrix)
> > +             return -ENOMEM;
> > +
> > +     /* Reserve ID#0 because it is special and never implemented */
> > +     irq_matrix_assign_system(imsic->matrix, 0, false);
> > +
> > +     /* Reserve IPI ID because it is special and used internally */
> > +     irq_matrix_assign_system(imsic->matrix, IMSIC_IPI_ID, false);
> > +
> > +     return 0;
> > +}
> > +
> > +static int __init imsic_get_parent_hartid(struct fwnode_handle *fwnode,
> > +                                       u32 index, unsigned long *hartid)
> > +{
> > +     struct of_phandle_args parent;
> > +     int rc;
> > +
> > +     /*
> > +      * Currently, only OF fwnode is supported so extend this
> > +      * function for ACPI support.
> > +      */
> > +     if (!is_of_node(fwnode))
> > +             return -EINVAL;
> > +
> > +     rc = of_irq_parse_one(to_of_node(fwnode), index, &parent);
> > +     if (rc)
> > +             return rc;
> > +
> > +     /*
> > +      * Skip interrupts other than external interrupts for
> > +      * current privilege level.
> > +      */
> > +     if (parent.args[0] != RV_IRQ_EXT)
> > +             return -EINVAL;
> > +
> > +     return riscv_of_parent_hartid(parent.np, hartid);
> > +}
> > +
> > +static int __init imsic_get_mmio_resource(struct fwnode_handle *fwnode,
> > +                                       u32 index, struct resource *res)
> > +{
> > +     /*
> > +      * Currently, only OF fwnode is supported so extend this
> > +      * function for ACPI support.
> > +      */
> > +     if (!is_of_node(fwnode))
> > +             return -EINVAL;
> > +
> > +     return of_address_to_resource(to_of_node(fwnode), index, res);
> > +}
> > +
> > +static int __init imsic_parse_fwnode(struct fwnode_handle *fwnode,
> > +                                  struct imsic_global_config *global,
> > +                                  u32 *nr_parent_irqs,
> > +                                  u32 *nr_mmios)
> > +{
> > +     unsigned long hartid;
> > +     struct resource res;
> > +     int rc;
> > +     u32 i;
> > +
> > +     /*
> > +      * Currently, only OF fwnode is supported so extend this
> > +      * function for ACPI support.
> > +      */
> > +     if (!is_of_node(fwnode))
> > +             return -EINVAL;
> > +
> > +     *nr_parent_irqs = 0;
> > +     *nr_mmios = 0;
> > +
> > +     /* Find number of parent interrupts */
> > +     while (!imsic_get_parent_hartid(fwnode, *nr_parent_irqs, &hartid))
> > +             (*nr_parent_irqs)++;
> > +     if (!(*nr_parent_irqs)) {
>
> Nit: Redundant parenthesis
>
> > +             pr_err("%pfwP: no parent irqs available\n", fwnode);
> > +             return -EINVAL;
> > +     }
> > +
> > +     /* Find number of guest index bits in MSI address */
> > +     rc = of_property_read_u32(to_of_node(fwnode), "riscv,guest-index-bits",
> > +                               &global->guest_index_bits);
> > +     if (rc)
> > +             global->guest_index_bits = 0;
> > +
> > +     /* Find number of HART index bits */
> > +     rc = of_property_read_u32(to_of_node(fwnode), "riscv,hart-index-bits",
> > +                               &global->hart_index_bits);
> > +     if (rc) {
> > +             /* Assume default value */
> > +             global->hart_index_bits = __fls(*nr_parent_irqs);
> > +             if (BIT(global->hart_index_bits) < *nr_parent_irqs)
> > +                     global->hart_index_bits++;
> > +     }
> > +
> > +     /* Find number of group index bits */
> > +     rc = of_property_read_u32(to_of_node(fwnode), "riscv,group-index-bits",
> > +                               &global->group_index_bits);
> > +     if (rc)
> > +             global->group_index_bits = 0;
> > +
> > +     /*
> > +      * Find first bit position of group index.
> > +      * If not specified assumed the default APLIC-IMSIC configuration.
> > +      */
> > +     rc = of_property_read_u32(to_of_node(fwnode), "riscv,group-index-shift",
> > +                               &global->group_index_shift);
> > +     if (rc)
> > +             global->group_index_shift = IMSIC_MMIO_PAGE_SHIFT * 2;
> > +
> > +     /* Find number of interrupt identities */
> > +     rc = of_property_read_u32(to_of_node(fwnode), "riscv,num-ids",
> > +                               &global->nr_ids);
> > +     if (rc) {
> > +             pr_err("%pfwP: number of interrupt identities not found\n",
> > +                     fwnode);
> > +             return rc;
> > +     }
> > +
> > +     /* Find number of guest interrupt identities */
> > +     rc = of_property_read_u32(to_of_node(fwnode), "riscv,num-guest-ids",
> > +                               &global->nr_guest_ids);
> > +     if (rc)
> > +             global->nr_guest_ids = global->nr_ids;
> > +
> > +     /* Sanity check guest index bits */
> > +     i = BITS_PER_LONG - IMSIC_MMIO_PAGE_SHIFT;
> > +     if (i < global->guest_index_bits) {
> > +             pr_err("%pfwP: guest index bits too big\n", fwnode);
> > +             return -EINVAL;
> > +     }
> > +
> > +     /* Sanity check HART index bits */
> > +     i = BITS_PER_LONG - IMSIC_MMIO_PAGE_SHIFT - global->guest_index_bits;
> > +     if (i < global->hart_index_bits) {
> > +             pr_err("%pfwP: HART index bits too big\n", fwnode);
> > +             return -EINVAL;
> > +     }
> > +
> > +     /* Sanity check group index bits */
> > +     i = BITS_PER_LONG - IMSIC_MMIO_PAGE_SHIFT -
> > +         global->guest_index_bits - global->hart_index_bits;
> > +     if (i < global->group_index_bits) {
> > +             pr_err("%pfwP: group index bits too big\n", fwnode);
> > +             return -EINVAL;
> > +     }
> > +
> > +     /* Sanity check group index shift */
> > +     i = global->group_index_bits + global->group_index_shift - 1;
> > +     if (i >= BITS_PER_LONG) {
> > +             pr_err("%pfwP: group index shift too big\n", fwnode);
> > +             return -EINVAL;
> > +     }
> > +
> > +     /* Sanity check number of interrupt identities */
> > +     if ((global->nr_ids < IMSIC_MIN_ID) ||
> > +         (global->nr_ids >= IMSIC_MAX_ID) ||
> > +         ((global->nr_ids & IMSIC_MIN_ID) != IMSIC_MIN_ID)) {
> > +             pr_err("%pfwP: invalid number of interrupt identities\n",
> > +                     fwnode);
>
> Nit: 100 chars
>
> > +             return -EINVAL;
> > +     }
> > +
> > +     /* Sanity check number of guest interrupt identities */
> > +     if ((global->nr_guest_ids < IMSIC_MIN_ID) ||
> > +         (global->nr_guest_ids >= IMSIC_MAX_ID) ||
> > +         ((global->nr_guest_ids & IMSIC_MIN_ID) != IMSIC_MIN_ID)) {
> > +             pr_err("%pfwP: invalid number of guest interrupt identities\n",
> > +                     fwnode);
>
> Nit: 100 chars
>
> > +             return -EINVAL;
> > +     }
> > +
> > +     /* Compute base address */
> > +     rc = imsic_get_mmio_resource(fwnode, 0, &res);
> > +     if (rc) {
> > +             pr_err("%pfwP: first MMIO resource not found\n", fwnode);
> > +             return -EINVAL;
> > +     }
> > +     global->base_addr = res.start;
> > +     global->base_addr &= ~(BIT(global->guest_index_bits +
> > +                                global->hart_index_bits +
> > +                                IMSIC_MMIO_PAGE_SHIFT) - 1);
> > +     global->base_addr &= ~((BIT(global->group_index_bits) - 1) <<
> > +                            global->group_index_shift);
> > +
> > +     /* Find number of MMIO register sets */
> > +     while (!imsic_get_mmio_resource(fwnode, *nr_mmios, &res))
> > +             (*nr_mmios)++;
> > +
> > +     return 0;
> > +}
> > +
> > +int __init imsic_setup_state(struct fwnode_handle *fwnode)
> > +{
> > +     u32 i, j, index, nr_parent_irqs, nr_mmios, nr_handlers = 0;
> > +     struct imsic_global_config *global;
> > +     struct imsic_local_config *local;
> > +     void __iomem **mmios_va = NULL;
> > +     struct resource *mmios = NULL;
> > +     unsigned long reloff, hartid;
> > +     phys_addr_t base_addr;
> > +     int rc, cpu;
> > +
> > +     /*
> > +      * Only one IMSIC instance allowed in a platform for clean
> > +      * implementation of SMP IRQ affinity and per-CPU IPIs.
> > +      *
> > +      * This means on a multi-socket (or multi-die) platform we
> > +      * will have multiple MMIO regions for one IMSIC instance.
> > +      */
> > +     if (imsic) {
> > +             pr_err("%pfwP: already initialized hence ignoring\n",
> > +                     fwnode);
>
> Nit: 100 chars
>
> > +             return -EALREADY;
> > +     }
> > +
> > +     if (!riscv_isa_extension_available(NULL, SxAIA)) {
> > +             pr_err("%pfwP: AIA support not available\n", fwnode);
> > +             return -ENODEV;
> > +     }
> > +
> > +     imsic = kzalloc(sizeof(*imsic), GFP_KERNEL);
> > +     if (!imsic)
> > +             return -ENOMEM;
> > +     imsic->fwnode = fwnode;
> > +     global = &imsic->global;
> > +
> > +     global->local = alloc_percpu(typeof(*(global->local)));
> > +     if (!global->local) {
> > +             rc = -ENOMEM;
> > +             goto out_free_priv;
> > +     }
> > +
> > +     /* Parse IMSIC fwnode */
> > +     rc = imsic_parse_fwnode(fwnode, global, &nr_parent_irqs, &nr_mmios);
> > +     if (rc)
> > +             goto out_free_local;
> > +
> > +     /* Allocate MMIO resource array */
> > +     mmios = kcalloc(nr_mmios, sizeof(*mmios), GFP_KERNEL);
> > +     if (!mmios) {
> > +             rc = -ENOMEM;
> > +             goto out_free_local;
> > +     }
> > +
> > +     /* Allocate MMIO virtual address array */
> > +     mmios_va = kcalloc(nr_mmios, sizeof(*mmios_va), GFP_KERNEL);
> > +     if (!mmios_va) {
> > +             rc = -ENOMEM;
> > +             goto out_iounmap;
> > +     }
> > +
> > +     /* Parse and map MMIO register sets */
> > +     for (i = 0; i < nr_mmios; i++) {
> > +             rc = imsic_get_mmio_resource(fwnode, i, &mmios[i]);
> > +             if (rc) {
> > +                     pr_err("%pfwP: unable to parse MMIO regset %d\n",
> > +                             fwnode, i);
>
> Nit: 100 chars
>
> > +                     goto out_iounmap;
> > +             }
> > +
> > +             base_addr = mmios[i].start;
> > +             base_addr &= ~(BIT(global->guest_index_bits +
> > +                                global->hart_index_bits +
> > +                                IMSIC_MMIO_PAGE_SHIFT) - 1);
> > +             base_addr &= ~((BIT(global->group_index_bits) - 1) <<
> > +                            global->group_index_shift);
> > +             if (base_addr != global->base_addr) {
> > +                     rc = -EINVAL;
> > +                     pr_err("%pfwP: address mismatch for regset %d\n",
> > +                             fwnode, i);
>
> Nit: 100 chars... and all the places below where applicable.
>
>
> Björn

Regards,
Anup
  
Anup Patel Feb. 20, 2024, 1:15 p.m. UTC | #4
On Tue, Feb 20, 2024 at 5:23 PM Björn Töpel <bjorn@kernelorg> wrote:
>
> Anup Patel <apatel@ventanamicro.com> writes:
>
> > The RISC-V advanced interrupt architecture (AIA) specification
> > defines a new MSI controller called incoming message signalled
> > interrupt controller (IMSIC) which manages MSI on per-HART (or
> > per-CPU) basis. It also supports IPIs as software injected MSIs.
> > (For more details refer https://github.com/riscv/riscv-aia)
> >
> > Let us add an early irqchip driver for RISC-V IMSIC which sets
> > up the IMSIC state and provide IPIs.
> >
> > Signed-off-by: Anup Patel <apatel@ventanamicro.com>
>
> This patch has a couple of checkpatch issues:
>
> CHECK: Alignment should match open parenthesis
> CHECK: Please don't use multiple blank lines
> CHECK: Please use a blank line after function/struct/union/enum declarations
> CHECK: Unnecessary parentheses around 'global->nr_guest_ids < IMSIC_MIN_ID'
> CHECK: Unnecessary parentheses around 'global->nr_guest_ids >= IMSIC_MAX_ID'
> CHECK: Unnecessary parentheses around 'global->nr_ids < IMSIC_MIN_ID'
> CHECK: Unnecessary parentheses around 'global->nr_ids >= IMSIC_MAX_ID'
> CHECK: Unnecessary parentheses around global->local
> CHECK: Unnecessary parentheses around imsic->lpriv
> CHECK: extern prototypes should be avoided in .h files
>

Okay, I will address these in the next revision.

Regards,
Anup
  
Thomas Gleixner Feb. 20, 2024, 1:15 p.m. UTC | #5
On Tue, Feb 20 2024 at 11:37, Anup Patel wrote:
> The RISC-V advanced interrupt architecture (AIA) specification
> defines a new MSI controller called incoming message signalled
> interrupt controller (IMSIC) which manages MSI on per-HART (or
> per-CPU) basis. It also supports IPIs as software injected MSIs.
> (For more details refer https://github.com/riscv/riscv-aia)
>
> Let us add an early irqchip driver for RISC-V IMSIC which sets
> up the IMSIC state and provide IPIs.

s/Let us add/Add/

> +#else
> +static void imsic_ipi_starting_cpu(void)
> +{
> +}
> +
> +static void imsic_ipi_dying_cpu(void)
> +{
> +}
> +
> +static int __init imsic_ipi_domain_init(void)
> +{
> +	return 0;
> +}

Please condense this into

static void imsic_ipi_starting_cpu(void) { }
static void imsic_ipi_dying_cpu(void) { }
static int __init imsic_ipi_domain_init(void) { return 0; }

No point in wasting space for these stubs.

> + * To handle an interrupt, we read the TOPEI CSR and write zero in one
> + * instruction. If TOPEI CSR is non-zero then we translate TOPEI.ID to
> + * Linux interrupt number and let Linux IRQ subsystem handle it.
> + */
> +static void imsic_handle_irq(struct irq_desc *desc)
> +{
> +	struct irq_chip *chip = irq_desc_get_chip(desc);
> +	int err, cpu = smp_processor_id();
> +	struct imsic_vector *vec;
> +	unsigned long local_id;
> +
> +	chained_irq_enter(chip, desc);
> +
> +	while ((local_id = csr_swap(CSR_TOPEI, 0))) {
> +		local_id = local_id >> TOPEI_ID_SHIFT;
> +
> +		if (local_id == IMSIC_IPI_ID) {
> +#ifdef CONFIG_SMP

	if (IS_ENABLED(CONFIG_SMP))

> +			ipi_mux_process();
> +#endif
> +			continue;
> +		}

> +
> +/* MUST be called with lpriv->lock held */

Instead of a comment which cannot be enforced just have

        lockdep_assert_held(&lpriv->lock);

right at the top of the function. That documents the requirement and
lets lockdep yell if not followed.

> +#ifdef CONFIG_SMP
> +static void imsic_local_timer_callback(struct timer_list *timer)
> +{
> +	struct imsic_local_priv *lpriv = this_cpu_ptr(imsic->lpriv);
> +	unsigned long flags;
> +
> +	raw_spin_lock_irqsave(&lpriv->lock, flags);
> +	__imsic_local_sync(lpriv);
> +	raw_spin_unlock_irqrestore(&lpriv->lock, flags);
> +}
> +
> +/* MUST be called with lpriv->lock held */

Ditto

> +static void __imsic_remote_sync(struct imsic_local_priv *lpriv, unsigned int cpu)

> +void imsic_vector_mask(struct imsic_vector *vec)
> +{
> +	struct imsic_local_priv *lpriv;
> +
> +	lpriv = per_cpu_ptr(imsic->lpriv, vec->cpu);
> +	if (WARN_ON(&lpriv->vectors[vec->local_id] != vec))
> +		return;

WARN_ON_ONCE(), no?

> +bool imsic_vector_isenabled(struct imsic_vector *vec)
> +{
> +	struct imsic_local_priv *lpriv = per_cpu_ptr(imsic->lpriv, vec->cpu);
> +	unsigned long flags;
> +	bool ret;
> +
> +	raw_spin_lock_irqsave(&lpriv->lock, flags);
> +	ret = vec->enable;
> +	raw_spin_unlock_irqrestore(&lpriv->lock, flags);

I'm not sure what you are trying to protect here. vec->enable can
obviously change right after the lock is dropped. So that's just a
snapshot, which is not any better than using

          READ_ONCE(vec->enable);

and a corresponding WRITE_ONCE() at the update site, which obviously
needs serialization.

> +static void __init imsic_local_cleanup(void)
> +{
> +	int cpu;
> +	struct imsic_local_priv *lpriv;

        struct imsic_local_priv *lpriv;
	int cpu;

Please.

> +void imsic_state_offline(void)
> +{
> +#ifdef CONFIG_SMP
> +	struct imsic_local_priv *lpriv = this_cpu_ptr(imsic->lpriv);
> +#endif

You can move that into the #ifdef below.

> +	unsigned long flags;
> +
> +	raw_spin_lock_irqsave(&imsic->matrix_lock, flags);
> +	irq_matrix_offline(imsic->matrix);
> +	raw_spin_unlock_irqrestore(&imsic->matrix_lock, flags);
> +
> +#ifdef CONFIG_SMP
> +	raw_spin_lock_irqsave(&lpriv->lock, flags);
> +	WARN_ON_ONCE(try_to_del_timer_sync(&lpriv->timer) < 0);
> +	raw_spin_unlock_irqrestore(&lpriv->lock, flags);
> +#endif
> +}


Thanks,

        tglx
  
Anup Patel Feb. 20, 2024, 4:33 p.m. UTC | #6
On Tue, Feb 20, 2024 at 6:45 PM Thomas Gleixner <tglx@linutronix.de> wrote:
>
> On Tue, Feb 20 2024 at 11:37, Anup Patel wrote:
> > The RISC-V advanced interrupt architecture (AIA) specification
> > defines a new MSI controller called incoming message signalled
> > interrupt controller (IMSIC) which manages MSI on per-HART (or
> > per-CPU) basis. It also supports IPIs as software injected MSIs.
> > (For more details refer https://github.com/riscv/riscv-aia)
> >
> > Let us add an early irqchip driver for RISC-V IMSIC which sets
> > up the IMSIC state and provide IPIs.
>
> s/Let us add/Add/

Okay, I will update.

>
> > +#else
> > +static void imsic_ipi_starting_cpu(void)
> > +{
> > +}
> > +
> > +static void imsic_ipi_dying_cpu(void)
> > +{
> > +}
> > +
> > +static int __init imsic_ipi_domain_init(void)
> > +{
> > +     return 0;
> > +}
>
> Please condense this into
>
> static void imsic_ipi_starting_cpu(void) { }
> static void imsic_ipi_dying_cpu(void) { }
> static int __init imsic_ipi_domain_init(void) { return 0; }
>
> No point in wasting space for these stubs.

Sure, I will update.

>
> > + * To handle an interrupt, we read the TOPEI CSR and write zero in one
> > + * instruction. If TOPEI CSR is non-zero then we translate TOPEI.ID to
> > + * Linux interrupt number and let Linux IRQ subsystem handle it.
> > + */
> > +static void imsic_handle_irq(struct irq_desc *desc)
> > +{
> > +     struct irq_chip *chip = irq_desc_get_chip(desc);
> > +     int err, cpu = smp_processor_id();
> > +     struct imsic_vector *vec;
> > +     unsigned long local_id;
> > +
> > +     chained_irq_enter(chip, desc);
> > +
> > +     while ((local_id = csr_swap(CSR_TOPEI, 0))) {
> > +             local_id = local_id >> TOPEI_ID_SHIFT;
> > +
> > +             if (local_id == IMSIC_IPI_ID) {
> > +#ifdef CONFIG_SMP
>
>         if (IS_ENABLED(CONFIG_SMP))

Okay, I will update.

>
> > +                     ipi_mux_process();
> > +#endif
> > +                     continue;
> > +             }
>
> > +
> > +/* MUST be called with lpriv->lock held */
>
> Instead of a comment which cannot be enforced just have
>
>         lockdep_assert_held(&lpriv->lock);
>
> right at the top of the function. That documents the requirement and
> lets lockdep yell if not followed.
>
> > +#ifdef CONFIG_SMP
> > +static void imsic_local_timer_callback(struct timer_list *timer)
> > +{
> > +     struct imsic_local_priv *lpriv = this_cpu_ptr(imsic->lpriv);
> > +     unsigned long flags;
> > +
> > +     raw_spin_lock_irqsave(&lpriv->lock, flags);
> > +     __imsic_local_sync(lpriv);
> > +     raw_spin_unlock_irqrestore(&lpriv->lock, flags);
> > +}
> > +
> > +/* MUST be called with lpriv->lock held */
>
> Ditto
>
> > +static void __imsic_remote_sync(struct imsic_local_priv *lpriv, unsigned int cpu)
>
> > +void imsic_vector_mask(struct imsic_vector *vec)
> > +{
> > +     struct imsic_local_priv *lpriv;
> > +
> > +     lpriv = per_cpu_ptr(imsic->lpriv, vec->cpu);
> > +     if (WARN_ON(&lpriv->vectors[vec->local_id] != vec))
> > +             return;
>
> WARN_ON_ONCE(), no?
>
> > +bool imsic_vector_isenabled(struct imsic_vector *vec)
> > +{
> > +     struct imsic_local_priv *lpriv = per_cpu_ptr(imsic->lpriv, vec->cpu);
> > +     unsigned long flags;
> > +     bool ret;
> > +
> > +     raw_spin_lock_irqsave(&lpriv->lock, flags);
> > +     ret = vec->enable;
> > +     raw_spin_unlock_irqrestore(&lpriv->lock, flags);
>
> I'm not sure what you are trying to protect here. vec->enable can
> obviously change right after the lock is dropped. So that's just a
> snapshot, which is not any better than using
>
>           READ_ONCE(vec->enable);
>
> and a corresponding WRITE_ONCE() at the update site, which obviously
> needs serialization.
>
> > +static void __init imsic_local_cleanup(void)
> > +{
> > +     int cpu;
> > +     struct imsic_local_priv *lpriv;
>
>         struct imsic_local_priv *lpriv;
>         int cpu;
>
> Please.
>
> > +void imsic_state_offline(void)
> > +{
> > +#ifdef CONFIG_SMP
> > +     struct imsic_local_priv *lpriv = this_cpu_ptr(imsic->lpriv);
> > +#endif
>
> You can move that into the #ifdef below.
>
> > +     unsigned long flags;
> > +
> > +     raw_spin_lock_irqsave(&imsic->matrix_lock, flags);
> > +     irq_matrix_offline(imsic->matrix);
> > +     raw_spin_unlock_irqrestore(&imsic->matrix_lock, flags);
> > +
> > +#ifdef CONFIG_SMP
> > +     raw_spin_lock_irqsave(&lpriv->lock, flags);
> > +     WARN_ON_ONCE(try_to_del_timer_sync(&lpriv->timer) < 0);
> > +     raw_spin_unlock_irqrestore(&lpriv->lock, flags);
> > +#endif
> > +}
>
>
> Thanks,
>
>         tglx
  
Björn Töpel Feb. 21, 2024, 11:59 a.m. UTC | #7
Anup Patel <apatel@ventanamicro.com> writes:

>> > +void imsic_vector_mask(struct imsic_vector *vec)
>> > +{
>> > +     struct imsic_local_priv *lpriv;
>> > +
>> > +     lpriv = per_cpu_ptr(imsic->lpriv, vec->cpu);
>> > +     if (WARN_ON(&lpriv->vectors[vec->local_id] != vec))
>> > +             return;
>> > +
>> > +     /*
>> > +      * This function is called through Linux irq subsystem with
>> > +      * irqs disabled so no need to save/restore irq flags.
>> > +      */
>> > +
>> > +     raw_spin_lock(&lpriv->lock);
>> > +
>> > +     vec->enable = false;
>> > +     bitmap_set(lpriv->dirty_bitmap, vec->local_id, 1);
>> > +     __imsic_remote_sync(lpriv, vec->cpu);
>> > +
>> > +     raw_spin_unlock(&lpriv->lock);
>> > +}
>>
>> Really nice that you're using a timer for the vector affinity change,
>> and got rid of the special/weird IMSIC/sync IPI. Can you really use a
>> timer for mask/unmask? That makes the mask/unmask operation
>> asynchronous!
>>
>> That was what I was trying to get though with this comment:
>> https://lore.kernel.org/linux-riscv/87sf24mo1g.fsf@all.your.base.are.belong.to.us/
>>
>> Also, using the smp_* IPI functions, you can pass arguments, so you
>> don't need the dirty_bitmap tracking the changes.
>
> The mask/unmask operations are called with irqs disabled so if
> CPU X does synchronous IPI to another CPU Y from mask/unmask
> operation then while CPU X is waiting for IPI to complete it cannot
> receive IPI from other CPUs which can lead to crashes and stalls.
>
> In general, we should not do any block/busy-wait work in
> mask/unmask operation of an irqchip driver.

Hmm, OK. Still, a bit odd that when the .irq_mask callback return, the
masking is not actually completed.

1. CPU 0 tries to mask an interrupt tried to CPU 1.
2. The timer is queued on CPU 1.
3. The call irq_mask returns on CPU 0
4. ...the irq is masked at some future point, determined by the callback
   at CPU 1

Is that the expected outcome?

There are .irq_mask implementation that does seem to go at length
(blocking) to perform the mask, e.g.: gic_mask_irq() which calls
gic_{re,}dist_wait_for_rwp that have sleep/retry loops. The GIC3 ITS
code has similar things going on.

I'm not saying you're wrong, I'm just trying to wrap my head around the
masking semantics.

> The AIA IMSIC spec allows setting ID pending bit using MSI write
> irrespective whether ID is enabled or not but the interrupt will be
> taken only after ID is enabled. In other words, there will be no
> loss of interrupt with delayed mask/unmask using async IPI or
> lazy timer.

No loss, but we might *get* an interrupt when we explicitly asked not to
get any. Maybe that's ok?


Björn
  
Anup Patel Feb. 21, 2024, 12:23 p.m. UTC | #8
On Wed, Feb 21, 2024 at 5:29 PM Björn Töpel <bjorn@kernelorg> wrote:
>
> Anup Patel <apatel@ventanamicro.com> writes:
>
> >> > +void imsic_vector_mask(struct imsic_vector *vec)
> >> > +{
> >> > +     struct imsic_local_priv *lpriv;
> >> > +
> >> > +     lpriv = per_cpu_ptr(imsic->lpriv, vec->cpu);
> >> > +     if (WARN_ON(&lpriv->vectors[vec->local_id] != vec))
> >> > +             return;
> >> > +
> >> > +     /*
> >> > +      * This function is called through Linux irq subsystem with
> >> > +      * irqs disabled so no need to save/restore irq flags.
> >> > +      */
> >> > +
> >> > +     raw_spin_lock(&lpriv->lock);
> >> > +
> >> > +     vec->enable = false;
> >> > +     bitmap_set(lpriv->dirty_bitmap, vec->local_id, 1);
> >> > +     __imsic_remote_sync(lpriv, vec->cpu);
> >> > +
> >> > +     raw_spin_unlock(&lpriv->lock);
> >> > +}
> >>
> >> Really nice that you're using a timer for the vector affinity change,
> >> and got rid of the special/weird IMSIC/sync IPI. Can you really use a
> >> timer for mask/unmask? That makes the mask/unmask operation
> >> asynchronous!
> >>
> >> That was what I was trying to get though with this comment:
> >> https://lore.kernel.org/linux-riscv/87sf24mo1g.fsf@all.your.base.are.belong.to.us/
> >>
> >> Also, using the smp_* IPI functions, you can pass arguments, so you
> >> don't need the dirty_bitmap tracking the changes.
> >
> > The mask/unmask operations are called with irqs disabled so if
> > CPU X does synchronous IPI to another CPU Y from mask/unmask
> > operation then while CPU X is waiting for IPI to complete it cannot
> > receive IPI from other CPUs which can lead to crashes and stalls.
> >
> > In general, we should not do any block/busy-wait work in
> > mask/unmask operation of an irqchip driver.
>
> Hmm, OK. Still, a bit odd that when the .irq_mask callback return, the
> masking is not actually completed.
>
> 1. CPU 0 tries to mask an interrupt tried to CPU 1.
> 2. The timer is queued on CPU 1.
> 3. The call irq_mask returns on CPU 0
> 4. ...the irq is masked at some future point, determined by the callback
>    at CPU 1
>
> Is that the expected outcome?

Yes, that's right.

>
> There are .irq_mask implementation that does seem to go at length
> (blocking) to perform the mask, e.g.: gic_mask_irq() which calls
> gic_{re,}dist_wait_for_rwp that have sleep/retry loops. The GIC3 ITS
> code has similar things going on.

The gic_{re,}dist_wait_for_rwp() polls on a HW register for completion
which will certainly complete in a predictable time whereas waiting
for IPI to be executed by another CPU is not predictable and fragile.

>
> I'm not saying you're wrong, I'm just trying to wrap my head around the
> masking semantics.
>
> > The AIA IMSIC spec allows setting ID pending bit using MSI write
> > irrespective whether ID is enabled or not but the interrupt will be
> > taken only after ID is enabled. In other words, there will be no
> > loss of interrupt with delayed mask/unmask using async IPI or
> > lazy timer.
>
> No loss, but we might *get* an interrupt when we explicitly asked not to
> get any. Maybe that's ok?
>

The delayed spurious interrupt after masking is avoided by additional
masking at the source of interrupt. For wired-to-MSI interrupts, we have
additional masking on the APLIC MSI-mode. For PCI MSI interrupts, we
have additional masking at PCI device level using pci_msi_mask_irq().

Regards,
Anup
  
Björn Töpel Feb. 21, 2024, 5:22 p.m. UTC | #9
Anup Patel <apatel@ventanamicro.com> writes:

> On Wed, Feb 21, 2024 at 5:29 PM Björn Töpel <bjorn@kernel.org> wrote:
>>
>> Anup Patel <apatel@ventanamicro.com> writes:
>>
>> >> > +void imsic_vector_mask(struct imsic_vector *vec)
>> >> > +{
>> >> > +     struct imsic_local_priv *lpriv;
>> >> > +
>> >> > +     lpriv = per_cpu_ptr(imsic->lpriv, vec->cpu);
>> >> > +     if (WARN_ON(&lpriv->vectors[vec->local_id] != vec))
>> >> > +             return;
>> >> > +
>> >> > +     /*
>> >> > +      * This function is called through Linux irq subsystem with
>> >> > +      * irqs disabled so no need to save/restore irq flags.
>> >> > +      */
>> >> > +
>> >> > +     raw_spin_lock(&lpriv->lock);
>> >> > +
>> >> > +     vec->enable = false;
>> >> > +     bitmap_set(lpriv->dirty_bitmap, vec->local_id, 1);
>> >> > +     __imsic_remote_sync(lpriv, vec->cpu);
>> >> > +
>> >> > +     raw_spin_unlock(&lpriv->lock);
>> >> > +}
>> >>
>> >> Really nice that you're using a timer for the vector affinity change,
>> >> and got rid of the special/weird IMSIC/sync IPI. Can you really use a
>> >> timer for mask/unmask? That makes the mask/unmask operation
>> >> asynchronous!
>> >>
>> >> That was what I was trying to get though with this comment:
>> >> https://lore.kernel.org/linux-riscv/87sf24mo1g.fsf@all.your.base.are.belong.to.us/
>> >>
>> >> Also, using the smp_* IPI functions, you can pass arguments, so you
>> >> don't need the dirty_bitmap tracking the changes.
>> >
>> > The mask/unmask operations are called with irqs disabled so if
>> > CPU X does synchronous IPI to another CPU Y from mask/unmask
>> > operation then while CPU X is waiting for IPI to complete it cannot
>> > receive IPI from other CPUs which can lead to crashes and stalls.
>> >
>> > In general, we should not do any block/busy-wait work in
>> > mask/unmask operation of an irqchip driver.
>>
>> Hmm, OK. Still, a bit odd that when the .irq_mask callback return, the
>> masking is not actually completed.
>>
>> 1. CPU 0 tries to mask an interrupt tried to CPU 1.
>> 2. The timer is queued on CPU 1.
>> 3. The call irq_mask returns on CPU 0
>> 4. ...the irq is masked at some future point, determined by the callback
>>    at CPU 1
>>
>> Is that the expected outcome?
>
> Yes, that's right.
>
>>
>> There are .irq_mask implementation that does seem to go at length
>> (blocking) to perform the mask, e.g.: gic_mask_irq() which calls
>> gic_{re,}dist_wait_for_rwp that have sleep/retry loops. The GIC3 ITS
>> code has similar things going on.
>
> The gic_{re,}dist_wait_for_rwp() polls on a HW register for completion
> which will certainly complete in a predictable time whereas waiting
> for IPI to be executed by another CPU is not predictable and fragile.
>
>>
>> I'm not saying you're wrong, I'm just trying to wrap my head around the
>> masking semantics.
>>
>> > The AIA IMSIC spec allows setting ID pending bit using MSI write
>> > irrespective whether ID is enabled or not but the interrupt will be
>> > taken only after ID is enabled. In other words, there will be no
>> > loss of interrupt with delayed mask/unmask using async IPI or
>> > lazy timer.
>>
>> No loss, but we might *get* an interrupt when we explicitly asked not to
>> get any. Maybe that's ok?
>>
>
> The delayed spurious interrupt after masking is avoided by additional
> masking at the source of interrupt. For wired-to-MSI interrupts, we have
> additional masking on the APLIC MSI-mode. For PCI MSI interrupts, we
> have additional masking at PCI device level using pci_msi_mask_irq().

Thanks for the clarifications, Anup! Much appreciated!
  

Patch

diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index f7149d0f3d45..85f86e31c996 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -546,6 +546,13 @@  config SIFIVE_PLIC
 	select IRQ_DOMAIN_HIERARCHY
 	select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP
 
+config RISCV_IMSIC
+	bool
+	depends on RISCV
+	select IRQ_DOMAIN_HIERARCHY
+	select GENERIC_IRQ_MATRIX_ALLOCATOR
+	select GENERIC_MSI_IRQ
+
 config EXYNOS_IRQ_COMBINER
 	bool "Samsung Exynos IRQ combiner support" if COMPILE_TEST
 	depends on (ARCH_EXYNOS && ARM) || COMPILE_TEST
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index ffd945fe71aa..d714724387ce 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -95,6 +95,7 @@  obj-$(CONFIG_QCOM_MPM)			+= irq-qcom-mpm.o
 obj-$(CONFIG_CSKY_MPINTC)		+= irq-csky-mpintc.o
 obj-$(CONFIG_CSKY_APB_INTC)		+= irq-csky-apb-intc.o
 obj-$(CONFIG_RISCV_INTC)		+= irq-riscv-intc.o
+obj-$(CONFIG_RISCV_IMSIC)		+= irq-riscv-imsic-state.o irq-riscv-imsic-early.o
 obj-$(CONFIG_SIFIVE_PLIC)		+= irq-sifive-plic.o
 obj-$(CONFIG_IMX_IRQSTEER)		+= irq-imx-irqsteer.o
 obj-$(CONFIG_IMX_INTMUX)		+= irq-imx-intmux.o
diff --git a/drivers/irqchip/irq-riscv-imsic-early.c b/drivers/irqchip/irq-riscv-imsic-early.c
new file mode 100644
index 000000000000..32fe428b1c19
--- /dev/null
+++ b/drivers/irqchip/irq-riscv-imsic-early.c
@@ -0,0 +1,213 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ * Copyright (C) 2022 Ventana Micro Systems Inc.
+ */
+
+#define pr_fmt(fmt) "riscv-imsic: " fmt
+#include <linux/cpu.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+
+#include "irq-riscv-imsic-state.h"
+
+static int imsic_parent_irq;
+
+#ifdef CONFIG_SMP
+static void imsic_ipi_send(unsigned int cpu)
+{
+	struct imsic_local_config *local = per_cpu_ptr(imsic->global.local, cpu);
+
+	writel_relaxed(IMSIC_IPI_ID, local->msi_va);
+}
+
+static void imsic_ipi_starting_cpu(void)
+{
+	/* Enable IPIs for current CPU. */
+	__imsic_id_set_enable(IMSIC_IPI_ID);
+}
+
+static void imsic_ipi_dying_cpu(void)
+{
+	/* Disable IPIs for current CPU. */
+	__imsic_id_clear_enable(IMSIC_IPI_ID);
+}
+
+static int __init imsic_ipi_domain_init(void)
+{
+	int virq;
+
+	/* Create IMSIC IPI multiplexing */
+	virq = ipi_mux_create(IMSIC_NR_IPI, imsic_ipi_send);
+	if (virq <= 0)
+		return (virq < 0) ? virq : -ENOMEM;
+
+	/* Set vIRQ range */
+	riscv_ipi_set_virq_range(virq, IMSIC_NR_IPI, true);
+
+	/* Announce that IMSIC is providing IPIs */
+	pr_info("%pfwP: providing IPIs using interrupt %d\n", imsic->fwnode, IMSIC_IPI_ID);
+
+	return 0;
+}
+#else
+static void imsic_ipi_starting_cpu(void)
+{
+}
+
+static void imsic_ipi_dying_cpu(void)
+{
+}
+
+static int __init imsic_ipi_domain_init(void)
+{
+	return 0;
+}
+#endif
+
+/*
+ * To handle an interrupt, we read the TOPEI CSR and write zero in one
+ * instruction. If TOPEI CSR is non-zero then we translate TOPEI.ID to
+ * Linux interrupt number and let Linux IRQ subsystem handle it.
+ */
+static void imsic_handle_irq(struct irq_desc *desc)
+{
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	int err, cpu = smp_processor_id();
+	struct imsic_vector *vec;
+	unsigned long local_id;
+
+	chained_irq_enter(chip, desc);
+
+	while ((local_id = csr_swap(CSR_TOPEI, 0))) {
+		local_id = local_id >> TOPEI_ID_SHIFT;
+
+		if (local_id == IMSIC_IPI_ID) {
+#ifdef CONFIG_SMP
+			ipi_mux_process();
+#endif
+			continue;
+		}
+
+		if (unlikely(!imsic->base_domain))
+			continue;
+
+		vec = imsic_vector_from_local_id(cpu, local_id);
+		if (!vec) {
+			pr_warn_ratelimited("vector not found for local ID 0x%lx\n", local_id);
+			continue;
+		}
+
+		err = generic_handle_domain_irq(imsic->base_domain,
+						vec->hwirq);
+		if (unlikely(err))
+			pr_warn_ratelimited("hwirq 0x%x mapping not found\n", vec->hwirq);
+	}
+
+	chained_irq_exit(chip, desc);
+}
+
+static int imsic_starting_cpu(unsigned int cpu)
+{
+	/* Mark per-CPU IMSIC state as online */
+	imsic_state_online();
+
+	/* Enable per-CPU parent interrupt */
+	enable_percpu_irq(imsic_parent_irq, irq_get_trigger_type(imsic_parent_irq));
+
+	/* Setup IPIs */
+	imsic_ipi_starting_cpu();
+
+	/*
+	 * Interrupts identities might have been enabled/disabled while
+	 * this CPU was not running so sync-up local enable/disable state.
+	 */
+	imsic_local_sync_all();
+
+	/* Enable local interrupt delivery */
+	imsic_local_delivery(true);
+
+	return 0;
+}
+
+static int imsic_dying_cpu(unsigned int cpu)
+{
+	/* Cleanup IPIs */
+	imsic_ipi_dying_cpu();
+
+	/* Mark per-CPU IMSIC state as offline */
+	imsic_state_offline();
+
+	return 0;
+}
+
+static int __init imsic_early_probe(struct fwnode_handle *fwnode)
+{
+	struct irq_domain *domain;
+	int rc;
+
+	/* Find parent domain and register chained handler */
+	domain = irq_find_matching_fwnode(riscv_get_intc_hwnode(), DOMAIN_BUS_ANY);
+	if (!domain) {
+		pr_err("%pfwP: Failed to find INTC domain\n", fwnode);
+		return -ENOENT;
+	}
+	imsic_parent_irq = irq_create_mapping(domain, RV_IRQ_EXT);
+	if (!imsic_parent_irq) {
+		pr_err("%pfwP: Failed to create INTC mapping\n", fwnode);
+		return -ENOENT;
+	}
+
+	/* Initialize IPI domain */
+	rc = imsic_ipi_domain_init();
+	if (rc) {
+		pr_err("%pfwP: Failed to initialize IPI domain\n", fwnode);
+		return rc;
+	}
+
+	/* Setup chained handler to the parent domain interrupt */
+	irq_set_chained_handler(imsic_parent_irq, imsic_handle_irq);
+
+	/*
+	 * Setup cpuhp state (must be done after setting imsic_parent_irq)
+	 *
+	 * Don't disable per-CPU IMSIC file when CPU goes offline
+	 * because this affects IPI and the masking/unmasking of
+	 * virtual IPIs is done via generic IPI-Mux
+	 */
+	cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "irqchip/riscv/imsic:starting",
+			  imsic_starting_cpu, imsic_dying_cpu);
+
+	return 0;
+}
+
+static int __init imsic_early_dt_init(struct device_node *node,
+				      struct device_node *parent)
+{
+	struct fwnode_handle *fwnode = &node->fwnode;
+	int rc;
+
+	/* Setup IMSIC state */
+	rc = imsic_setup_state(fwnode);
+	if (rc) {
+		pr_err("%pfwP: failed to setup state (error %d)\n",
+			fwnode, rc);
+		return rc;
+	}
+
+	/* Do early setup of IPIs */
+	rc = imsic_early_probe(fwnode);
+	if (rc)
+		return rc;
+
+	/* Ensure that OF platform device gets probed */
+	of_node_clear_flag(node, OF_POPULATED);
+	return 0;
+}
+IRQCHIP_DECLARE(riscv_imsic, "riscv,imsics", imsic_early_dt_init);
diff --git a/drivers/irqchip/irq-riscv-imsic-state.c b/drivers/irqchip/irq-riscv-imsic-state.c
new file mode 100644
index 000000000000..4f347486ec7c
--- /dev/null
+++ b/drivers/irqchip/irq-riscv-imsic-state.c
@@ -0,0 +1,906 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ * Copyright (C) 2022 Ventana Micro Systems Inc.
+ */
+
+#define pr_fmt(fmt) "riscv-imsic: " fmt
+#include <linux/cpu.h>
+#include <linux/bitmap.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <asm/hwcap.h>
+
+#include "irq-riscv-imsic-state.h"
+
+#define IMSIC_DISABLE_EIDELIVERY		0
+#define IMSIC_ENABLE_EIDELIVERY			1
+#define IMSIC_DISABLE_EITHRESHOLD		1
+#define IMSIC_ENABLE_EITHRESHOLD		0
+
+static inline void imsic_csr_write(unsigned long reg, unsigned long val)
+{
+	csr_write(CSR_ISELECT, reg);
+	csr_write(CSR_IREG, val);
+}
+
+static inline unsigned long imsic_csr_read(unsigned long reg)
+{
+	csr_write(CSR_ISELECT, reg);
+	return csr_read(CSR_IREG);
+}
+
+static inline unsigned long imsic_csr_read_clear(unsigned long reg, unsigned long val)
+{
+	csr_write(CSR_ISELECT, reg);
+	return csr_read_clear(CSR_IREG, val);
+}
+
+static inline void imsic_csr_set(unsigned long reg, unsigned long val)
+{
+	csr_write(CSR_ISELECT, reg);
+	csr_set(CSR_IREG, val);
+}
+
+static inline void imsic_csr_clear(unsigned long reg, unsigned long val)
+{
+	csr_write(CSR_ISELECT, reg);
+	csr_clear(CSR_IREG, val);
+}
+
+struct imsic_priv *imsic;
+
+const struct imsic_global_config *imsic_get_global_config(void)
+{
+	return imsic ? &imsic->global : NULL;
+}
+EXPORT_SYMBOL_GPL(imsic_get_global_config);
+
+static bool __imsic_eix_read_clear(unsigned long id, bool pend)
+{
+	unsigned long isel, imask;
+
+	isel = id / BITS_PER_LONG;
+	isel *= BITS_PER_LONG / IMSIC_EIPx_BITS;
+	isel += pend ? IMSIC_EIP0 : IMSIC_EIE0;
+	imask = BIT(id & (__riscv_xlen - 1));
+
+	return (imsic_csr_read_clear(isel, imask) & imask) ? true : false;
+}
+
+static inline bool __imsic_id_read_clear_enabled(unsigned long id)
+{
+	return __imsic_eix_read_clear(id, false);
+}
+
+static inline bool __imsic_id_read_clear_pending(unsigned long id)
+{
+	return __imsic_eix_read_clear(id, true);
+}
+
+void __imsic_eix_update(unsigned long base_id, unsigned long num_id, bool pend, bool val)
+{
+	unsigned long id = base_id, last_id = base_id + num_id;
+	unsigned long i, isel, ireg;
+
+	while (id < last_id) {
+		isel = id / BITS_PER_LONG;
+		isel *= BITS_PER_LONG / IMSIC_EIPx_BITS;
+		isel += (pend) ? IMSIC_EIP0 : IMSIC_EIE0;
+
+		/*
+		 * Prepare the ID mask to be programmed in the
+		 * IMSIC EIEx and EIPx registers. These registers
+		 * are XLEN-wide and we must not touch IDs which
+		 * are < base_id and >= (base_id + num_id).
+		 */
+		ireg = 0;
+		for (i = id & (__riscv_xlen - 1); (id < last_id) && (i < __riscv_xlen); i++) {
+			ireg |= BIT(i);
+			id++;
+		}
+
+		/*
+		 * The IMSIC EIEx and EIPx registers are indirectly
+		 * accessed via using ISELECT and IREG CSRs so we
+		 * need to access these CSRs without getting preempted.
+		 *
+		 * All existing users of this function call this
+		 * function with local IRQs disabled so we don't
+		 * need to do anything special here.
+		 */
+		if (val)
+			imsic_csr_set(isel, ireg);
+		else
+			imsic_csr_clear(isel, ireg);
+	}
+}
+
+/* MUST be called with lpriv->lock held */
+static void __imsic_local_sync(struct imsic_local_priv *lpriv)
+{
+	struct imsic_local_config *mlocal;
+	struct imsic_vector *vec, *mvec;
+	int i;
+
+	/* This pairs with the barrier in __imsic_remote_sync(). */
+	smp_mb();
+
+	for_each_set_bit(i, lpriv->dirty_bitmap, imsic->global.nr_ids + 1) {
+		if (!i || i == IMSIC_IPI_ID)
+			goto skip;
+		vec = &lpriv->vectors[i];
+
+		if (vec->enable)
+			__imsic_id_set_enable(i);
+		else
+			__imsic_id_clear_enable(i);
+
+		/*
+		 * If the ID was being moved to a new ID on some other CPU
+		 * then we can get a MSI during the movement so check the
+		 * ID pending bit and re-trigger the new ID on other CPU
+		 * using MMIO write.
+		 */
+		mvec = vec->move;
+		vec->move = NULL;
+		if (mvec && mvec != vec) {
+			if (__imsic_id_read_clear_pending(i)) {
+				mlocal = per_cpu_ptr(imsic->global.local, mvec->cpu);
+				writel_relaxed(mvec->local_id, mlocal->msi_va);
+			}
+
+			imsic_vector_free(&lpriv->vectors[i]);
+		}
+
+skip:
+		bitmap_clear(lpriv->dirty_bitmap, i, 1);
+	}
+}
+
+void imsic_local_sync_all(void)
+{
+	struct imsic_local_priv *lpriv = this_cpu_ptr(imsic->lpriv);
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&lpriv->lock, flags);
+	bitmap_fill(lpriv->dirty_bitmap, imsic->global.nr_ids + 1);
+	__imsic_local_sync(lpriv);
+	raw_spin_unlock_irqrestore(&lpriv->lock, flags);
+}
+
+void imsic_local_delivery(bool enable)
+{
+	if (enable) {
+		imsic_csr_write(IMSIC_EITHRESHOLD, IMSIC_ENABLE_EITHRESHOLD);
+		imsic_csr_write(IMSIC_EIDELIVERY, IMSIC_ENABLE_EIDELIVERY);
+		return;
+	}
+
+	imsic_csr_write(IMSIC_EIDELIVERY, IMSIC_DISABLE_EIDELIVERY);
+	imsic_csr_write(IMSIC_EITHRESHOLD, IMSIC_DISABLE_EITHRESHOLD);
+}
+
+#ifdef CONFIG_SMP
+static void imsic_local_timer_callback(struct timer_list *timer)
+{
+	struct imsic_local_priv *lpriv = this_cpu_ptr(imsic->lpriv);
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&lpriv->lock, flags);
+	__imsic_local_sync(lpriv);
+	raw_spin_unlock_irqrestore(&lpriv->lock, flags);
+}
+
+/* MUST be called with lpriv->lock held */
+static void __imsic_remote_sync(struct imsic_local_priv *lpriv, unsigned int cpu)
+{
+	/*
+	 * Ensure that changes to vector enable, vector move and
+	 * dirty bitmap are visible to the target CPU.
+	 *
+	 * This pairs with the barrier in __imsic_local_sync().
+	 */
+	smp_mb();
+
+	/*
+	 * We schedule a timer on the target CPU if the target CPU is not
+	 * same as the current CPU. An offline CPU will unconditionally
+	 * synchronize IDs through imsic_starting_cpu() when the
+	 * CPU is brought up.
+	 */
+	if (cpu_online(cpu)) {
+		if (cpu != smp_processor_id()) {
+			if (!timer_pending(&lpriv->timer)) {
+				lpriv->timer.expires = jiffies + 1;
+				add_timer_on(&lpriv->timer, cpu);
+			}
+		} else {
+			__imsic_local_sync(lpriv);
+		}
+	}
+}
+#else
+/* MUST be called with lpriv->lock held */
+static void __imsic_remote_sync(struct imsic_local_priv *lpriv, unsigned int cpu)
+{
+	__imsic_local_sync(lpriv);
+}
+#endif
+
+void imsic_vector_mask(struct imsic_vector *vec)
+{
+	struct imsic_local_priv *lpriv;
+
+	lpriv = per_cpu_ptr(imsic->lpriv, vec->cpu);
+	if (WARN_ON(&lpriv->vectors[vec->local_id] != vec))
+		return;
+
+	/*
+	 * This function is called through Linux irq subsystem with
+	 * irqs disabled so no need to save/restore irq flags.
+	 */
+
+	raw_spin_lock(&lpriv->lock);
+
+	vec->enable = false;
+	bitmap_set(lpriv->dirty_bitmap, vec->local_id, 1);
+	__imsic_remote_sync(lpriv, vec->cpu);
+
+	raw_spin_unlock(&lpriv->lock);
+}
+
+void imsic_vector_unmask(struct imsic_vector *vec)
+{
+	struct imsic_local_priv *lpriv;
+
+	lpriv = per_cpu_ptr(imsic->lpriv, vec->cpu);
+	if (WARN_ON(&lpriv->vectors[vec->local_id] != vec))
+		return;
+
+	/*
+	 * This function is called through Linux irq subsystem with
+	 * irqs disabled so no need to save/restore irq flags.
+	 */
+
+	raw_spin_lock(&lpriv->lock);
+
+	vec->enable = true;
+	bitmap_set(lpriv->dirty_bitmap, vec->local_id, 1);
+	__imsic_remote_sync(lpriv, vec->cpu);
+
+	raw_spin_unlock(&lpriv->lock);
+}
+
+
+bool imsic_vector_isenabled(struct imsic_vector *vec)
+{
+	struct imsic_local_priv *lpriv = per_cpu_ptr(imsic->lpriv, vec->cpu);
+	unsigned long flags;
+	bool ret;
+
+	raw_spin_lock_irqsave(&lpriv->lock, flags);
+	ret = vec->enable;
+	raw_spin_unlock_irqrestore(&lpriv->lock, flags);
+
+	return ret;
+}
+
+struct imsic_vector *imsic_vector_get_move(struct imsic_vector *vec)
+{
+	struct imsic_local_priv *lpriv = per_cpu_ptr(imsic->lpriv, vec->cpu);
+	struct imsic_vector *ret;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&lpriv->lock, flags);
+	ret = vec->move;
+	raw_spin_unlock_irqrestore(&lpriv->lock, flags);
+
+	return ret;
+}
+
+static bool imsic_vector_move_update(struct imsic_local_priv *lpriv, struct imsic_vector *vec,
+				     bool new_enable, struct imsic_vector *new_move)
+{
+	unsigned long flags;
+	bool enabled;
+
+	raw_spin_lock_irqsave(&lpriv->lock, flags);
+
+	/* Update enable and move details */
+	enabled = vec->enable;
+	vec->enable = new_enable;
+	vec->move = new_move;
+
+	/* Mark the vector as dirty and synchronize */
+	bitmap_set(lpriv->dirty_bitmap, vec->local_id, 1);
+	__imsic_remote_sync(lpriv, vec->cpu);
+
+	raw_spin_unlock_irqrestore(&lpriv->lock, flags);
+
+	return enabled;
+}
+
+void imsic_vector_move(struct imsic_vector *old_vec, struct imsic_vector *new_vec)
+{
+	struct imsic_local_priv *old_lpriv, *new_lpriv;
+	bool enabled;
+
+	if (WARN_ON(old_vec->cpu == new_vec->cpu))
+		return;
+
+	old_lpriv = per_cpu_ptr(imsic->lpriv, old_vec->cpu);
+	if (WARN_ON(&old_lpriv->vectors[old_vec->local_id] != old_vec))
+		return;
+
+	new_lpriv = per_cpu_ptr(imsic->lpriv, new_vec->cpu);
+	if (WARN_ON(&new_lpriv->vectors[new_vec->local_id] != new_vec))
+		return;
+
+	/*
+	 * Move and re-trigger the new vector based on the pending
+	 * state of the old vector because we might get a device
+	 * interrupt on the old vector while device was being moved
+	 * to the new vector.
+	 */
+	enabled = imsic_vector_move_update(old_lpriv, old_vec, false, new_vec);
+	imsic_vector_move_update(new_lpriv, new_vec, enabled, new_vec);
+}
+
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+void imsic_vector_debug_show(struct seq_file *m, struct imsic_vector *vec, int ind)
+{
+	struct imsic_local_priv *lpriv;
+	struct imsic_vector *mvec;
+	bool is_enabled;
+
+	lpriv = per_cpu_ptr(imsic->lpriv, vec->cpu);
+	if (WARN_ON(&lpriv->vectors[vec->local_id] != vec))
+		return;
+
+	is_enabled = imsic_vector_isenabled(vec);
+	mvec = imsic_vector_get_move(vec);
+
+	seq_printf(m, "%*starget_cpu      : %5u\n", ind, "", vec->cpu);
+	seq_printf(m, "%*starget_local_id : %5u\n", ind, "", vec->local_id);
+	seq_printf(m, "%*sis_reserved     : %5u\n", ind, "",
+		   (vec->local_id <= IMSIC_IPI_ID) ? 1 : 0);
+	seq_printf(m, "%*sis_enabled      : %5u\n", ind, "", (is_enabled) ? 1 : 0);
+	seq_printf(m, "%*sis_move_pending : %5u\n", ind, "", (mvec) ? 1 : 0);
+	if (mvec) {
+		seq_printf(m, "%*smove_cpu        : %5u\n", ind, "", mvec->cpu);
+		seq_printf(m, "%*smove_local_id   : %5u\n", ind, "", mvec->local_id);
+	}
+}
+
+void imsic_vector_debug_show_summary(struct seq_file *m, int ind)
+{
+	irq_matrix_debug_show(m, imsic->matrix, ind);
+}
+#endif
+
+struct imsic_vector *imsic_vector_from_local_id(unsigned int cpu, unsigned int local_id)
+{
+	struct imsic_local_priv *lpriv = per_cpu_ptr(imsic->lpriv, cpu);
+
+	if (!lpriv || imsic->global.nr_ids < local_id)
+		return NULL;
+
+	return &lpriv->vectors[local_id];
+}
+
+struct imsic_vector *imsic_vector_alloc(unsigned int hwirq, const struct cpumask *mask)
+{
+	struct imsic_vector *vec = NULL;
+	struct imsic_local_priv *lpriv;
+	unsigned long flags;
+	unsigned int cpu;
+	int local_id;
+
+	raw_spin_lock_irqsave(&imsic->matrix_lock, flags);
+	local_id = irq_matrix_alloc(imsic->matrix, mask, false, &cpu);
+	raw_spin_unlock_irqrestore(&imsic->matrix_lock, flags);
+	if (local_id < 0)
+		return NULL;
+
+	lpriv = per_cpu_ptr(imsic->lpriv, cpu);
+	vec = &lpriv->vectors[local_id];
+	vec->hwirq = hwirq;
+	vec->enable = false;
+	vec->move = NULL;
+
+	return vec;
+}
+
+void imsic_vector_free(struct imsic_vector *vec)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&imsic->matrix_lock, flags);
+	vec->hwirq = UINT_MAX;
+	irq_matrix_free(imsic->matrix, vec->cpu, vec->local_id, false);
+	raw_spin_unlock_irqrestore(&imsic->matrix_lock, flags);
+}
+
+static void __init imsic_local_cleanup(void)
+{
+	int cpu;
+	struct imsic_local_priv *lpriv;
+
+	for_each_possible_cpu(cpu) {
+		lpriv = per_cpu_ptr(imsic->lpriv, cpu);
+
+		bitmap_free(lpriv->dirty_bitmap);
+		kfree(lpriv->vectors);
+	}
+
+	free_percpu(imsic->lpriv);
+}
+
+static int __init imsic_local_init(void)
+{
+	struct imsic_global_config *global = &imsic->global;
+	struct imsic_local_priv *lpriv;
+	struct imsic_vector *vec;
+	int cpu, i;
+
+	/* Allocate per-CPU private state */
+	imsic->lpriv = alloc_percpu(typeof(*(imsic->lpriv)));
+	if (!imsic->lpriv)
+		return -ENOMEM;
+
+	/* Setup per-CPU private state */
+	for_each_possible_cpu(cpu) {
+		lpriv = per_cpu_ptr(imsic->lpriv, cpu);
+
+		raw_spin_lock_init(&lpriv->lock);
+
+		/* Allocate dirty bitmap */
+		lpriv->dirty_bitmap = bitmap_zalloc(global->nr_ids + 1, GFP_KERNEL);
+		if (!lpriv->dirty_bitmap)
+			goto fail_local_cleanup;
+
+#ifdef CONFIG_SMP
+		/* Setup lazy timer for synchronization */
+		timer_setup(&lpriv->timer, imsic_local_timer_callback, TIMER_PINNED);
+#endif
+
+		/* Allocate vector array */
+		lpriv->vectors = kcalloc(global->nr_ids + 1, sizeof(*lpriv->vectors),
+					 GFP_KERNEL);
+		if (!lpriv->vectors)
+			goto fail_local_cleanup;
+
+		/* Setup vector array */
+		for (i = 0; i <= global->nr_ids; i++) {
+			vec = &lpriv->vectors[i];
+			vec->cpu = cpu;
+			vec->local_id = i;
+			vec->hwirq = UINT_MAX;
+		}
+	}
+
+	return 0;
+
+fail_local_cleanup:
+	imsic_local_cleanup();
+	return -ENOMEM;
+}
+
+void imsic_state_online(void)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&imsic->matrix_lock, flags);
+	irq_matrix_online(imsic->matrix);
+	raw_spin_unlock_irqrestore(&imsic->matrix_lock, flags);
+}
+
+void imsic_state_offline(void)
+{
+#ifdef CONFIG_SMP
+	struct imsic_local_priv *lpriv = this_cpu_ptr(imsic->lpriv);
+#endif
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&imsic->matrix_lock, flags);
+	irq_matrix_offline(imsic->matrix);
+	raw_spin_unlock_irqrestore(&imsic->matrix_lock, flags);
+
+#ifdef CONFIG_SMP
+	raw_spin_lock_irqsave(&lpriv->lock, flags);
+	WARN_ON_ONCE(try_to_del_timer_sync(&lpriv->timer) < 0);
+	raw_spin_unlock_irqrestore(&lpriv->lock, flags);
+#endif
+}
+
+static int __init imsic_matrix_init(void)
+{
+	struct imsic_global_config *global = &imsic->global;
+
+	raw_spin_lock_init(&imsic->matrix_lock);
+	imsic->matrix = irq_alloc_matrix(global->nr_ids + 1,
+					 0, global->nr_ids + 1);
+	if (!imsic->matrix)
+		return -ENOMEM;
+
+	/* Reserve ID#0 because it is special and never implemented */
+	irq_matrix_assign_system(imsic->matrix, 0, false);
+
+	/* Reserve IPI ID because it is special and used internally */
+	irq_matrix_assign_system(imsic->matrix, IMSIC_IPI_ID, false);
+
+	return 0;
+}
+
+static int __init imsic_get_parent_hartid(struct fwnode_handle *fwnode,
+					  u32 index, unsigned long *hartid)
+{
+	struct of_phandle_args parent;
+	int rc;
+
+	/*
+	 * Currently, only OF fwnode is supported so extend this
+	 * function for ACPI support.
+	 */
+	if (!is_of_node(fwnode))
+		return -EINVAL;
+
+	rc = of_irq_parse_one(to_of_node(fwnode), index, &parent);
+	if (rc)
+		return rc;
+
+	/*
+	 * Skip interrupts other than external interrupts for
+	 * current privilege level.
+	 */
+	if (parent.args[0] != RV_IRQ_EXT)
+		return -EINVAL;
+
+	return riscv_of_parent_hartid(parent.np, hartid);
+}
+
+static int __init imsic_get_mmio_resource(struct fwnode_handle *fwnode,
+					  u32 index, struct resource *res)
+{
+	/*
+	 * Currently, only OF fwnode is supported so extend this
+	 * function for ACPI support.
+	 */
+	if (!is_of_node(fwnode))
+		return -EINVAL;
+
+	return of_address_to_resource(to_of_node(fwnode), index, res);
+}
+
+static int __init imsic_parse_fwnode(struct fwnode_handle *fwnode,
+				     struct imsic_global_config *global,
+				     u32 *nr_parent_irqs,
+				     u32 *nr_mmios)
+{
+	unsigned long hartid;
+	struct resource res;
+	int rc;
+	u32 i;
+
+	/*
+	 * Currently, only OF fwnode is supported so extend this
+	 * function for ACPI support.
+	 */
+	if (!is_of_node(fwnode))
+		return -EINVAL;
+
+	*nr_parent_irqs = 0;
+	*nr_mmios = 0;
+
+	/* Find number of parent interrupts */
+	while (!imsic_get_parent_hartid(fwnode, *nr_parent_irqs, &hartid))
+		(*nr_parent_irqs)++;
+	if (!(*nr_parent_irqs)) {
+		pr_err("%pfwP: no parent irqs available\n", fwnode);
+		return -EINVAL;
+	}
+
+	/* Find number of guest index bits in MSI address */
+	rc = of_property_read_u32(to_of_node(fwnode), "riscv,guest-index-bits",
+				  &global->guest_index_bits);
+	if (rc)
+		global->guest_index_bits = 0;
+
+	/* Find number of HART index bits */
+	rc = of_property_read_u32(to_of_node(fwnode), "riscv,hart-index-bits",
+				  &global->hart_index_bits);
+	if (rc) {
+		/* Assume default value */
+		global->hart_index_bits = __fls(*nr_parent_irqs);
+		if (BIT(global->hart_index_bits) < *nr_parent_irqs)
+			global->hart_index_bits++;
+	}
+
+	/* Find number of group index bits */
+	rc = of_property_read_u32(to_of_node(fwnode), "riscv,group-index-bits",
+				  &global->group_index_bits);
+	if (rc)
+		global->group_index_bits = 0;
+
+	/*
+	 * Find first bit position of group index.
+	 * If not specified assumed the default APLIC-IMSIC configuration.
+	 */
+	rc = of_property_read_u32(to_of_node(fwnode), "riscv,group-index-shift",
+				  &global->group_index_shift);
+	if (rc)
+		global->group_index_shift = IMSIC_MMIO_PAGE_SHIFT * 2;
+
+	/* Find number of interrupt identities */
+	rc = of_property_read_u32(to_of_node(fwnode), "riscv,num-ids",
+				  &global->nr_ids);
+	if (rc) {
+		pr_err("%pfwP: number of interrupt identities not found\n",
+			fwnode);
+		return rc;
+	}
+
+	/* Find number of guest interrupt identities */
+	rc = of_property_read_u32(to_of_node(fwnode), "riscv,num-guest-ids",
+				  &global->nr_guest_ids);
+	if (rc)
+		global->nr_guest_ids = global->nr_ids;
+
+	/* Sanity check guest index bits */
+	i = BITS_PER_LONG - IMSIC_MMIO_PAGE_SHIFT;
+	if (i < global->guest_index_bits) {
+		pr_err("%pfwP: guest index bits too big\n", fwnode);
+		return -EINVAL;
+	}
+
+	/* Sanity check HART index bits */
+	i = BITS_PER_LONG - IMSIC_MMIO_PAGE_SHIFT - global->guest_index_bits;
+	if (i < global->hart_index_bits) {
+		pr_err("%pfwP: HART index bits too big\n", fwnode);
+		return -EINVAL;
+	}
+
+	/* Sanity check group index bits */
+	i = BITS_PER_LONG - IMSIC_MMIO_PAGE_SHIFT -
+	    global->guest_index_bits - global->hart_index_bits;
+	if (i < global->group_index_bits) {
+		pr_err("%pfwP: group index bits too big\n", fwnode);
+		return -EINVAL;
+	}
+
+	/* Sanity check group index shift */
+	i = global->group_index_bits + global->group_index_shift - 1;
+	if (i >= BITS_PER_LONG) {
+		pr_err("%pfwP: group index shift too big\n", fwnode);
+		return -EINVAL;
+	}
+
+	/* Sanity check number of interrupt identities */
+	if ((global->nr_ids < IMSIC_MIN_ID) ||
+	    (global->nr_ids >= IMSIC_MAX_ID) ||
+	    ((global->nr_ids & IMSIC_MIN_ID) != IMSIC_MIN_ID)) {
+		pr_err("%pfwP: invalid number of interrupt identities\n",
+			fwnode);
+		return -EINVAL;
+	}
+
+	/* Sanity check number of guest interrupt identities */
+	if ((global->nr_guest_ids < IMSIC_MIN_ID) ||
+	    (global->nr_guest_ids >= IMSIC_MAX_ID) ||
+	    ((global->nr_guest_ids & IMSIC_MIN_ID) != IMSIC_MIN_ID)) {
+		pr_err("%pfwP: invalid number of guest interrupt identities\n",
+			fwnode);
+		return -EINVAL;
+	}
+
+	/* Compute base address */
+	rc = imsic_get_mmio_resource(fwnode, 0, &res);
+	if (rc) {
+		pr_err("%pfwP: first MMIO resource not found\n", fwnode);
+		return -EINVAL;
+	}
+	global->base_addr = res.start;
+	global->base_addr &= ~(BIT(global->guest_index_bits +
+				   global->hart_index_bits +
+				   IMSIC_MMIO_PAGE_SHIFT) - 1);
+	global->base_addr &= ~((BIT(global->group_index_bits) - 1) <<
+			       global->group_index_shift);
+
+	/* Find number of MMIO register sets */
+	while (!imsic_get_mmio_resource(fwnode, *nr_mmios, &res))
+		(*nr_mmios)++;
+
+	return 0;
+}
+
+int __init imsic_setup_state(struct fwnode_handle *fwnode)
+{
+	u32 i, j, index, nr_parent_irqs, nr_mmios, nr_handlers = 0;
+	struct imsic_global_config *global;
+	struct imsic_local_config *local;
+	void __iomem **mmios_va = NULL;
+	struct resource *mmios = NULL;
+	unsigned long reloff, hartid;
+	phys_addr_t base_addr;
+	int rc, cpu;
+
+	/*
+	 * Only one IMSIC instance allowed in a platform for clean
+	 * implementation of SMP IRQ affinity and per-CPU IPIs.
+	 *
+	 * This means on a multi-socket (or multi-die) platform we
+	 * will have multiple MMIO regions for one IMSIC instance.
+	 */
+	if (imsic) {
+		pr_err("%pfwP: already initialized hence ignoring\n",
+			fwnode);
+		return -EALREADY;
+	}
+
+	if (!riscv_isa_extension_available(NULL, SxAIA)) {
+		pr_err("%pfwP: AIA support not available\n", fwnode);
+		return -ENODEV;
+	}
+
+	imsic = kzalloc(sizeof(*imsic), GFP_KERNEL);
+	if (!imsic)
+		return -ENOMEM;
+	imsic->fwnode = fwnode;
+	global = &imsic->global;
+
+	global->local = alloc_percpu(typeof(*(global->local)));
+	if (!global->local) {
+		rc = -ENOMEM;
+		goto out_free_priv;
+	}
+
+	/* Parse IMSIC fwnode */
+	rc = imsic_parse_fwnode(fwnode, global, &nr_parent_irqs, &nr_mmios);
+	if (rc)
+		goto out_free_local;
+
+	/* Allocate MMIO resource array */
+	mmios = kcalloc(nr_mmios, sizeof(*mmios), GFP_KERNEL);
+	if (!mmios) {
+		rc = -ENOMEM;
+		goto out_free_local;
+	}
+
+	/* Allocate MMIO virtual address array */
+	mmios_va = kcalloc(nr_mmios, sizeof(*mmios_va), GFP_KERNEL);
+	if (!mmios_va) {
+		rc = -ENOMEM;
+		goto out_iounmap;
+	}
+
+	/* Parse and map MMIO register sets */
+	for (i = 0; i < nr_mmios; i++) {
+		rc = imsic_get_mmio_resource(fwnode, i, &mmios[i]);
+		if (rc) {
+			pr_err("%pfwP: unable to parse MMIO regset %d\n",
+				fwnode, i);
+			goto out_iounmap;
+		}
+
+		base_addr = mmios[i].start;
+		base_addr &= ~(BIT(global->guest_index_bits +
+				   global->hart_index_bits +
+				   IMSIC_MMIO_PAGE_SHIFT) - 1);
+		base_addr &= ~((BIT(global->group_index_bits) - 1) <<
+			       global->group_index_shift);
+		if (base_addr != global->base_addr) {
+			rc = -EINVAL;
+			pr_err("%pfwP: address mismatch for regset %d\n",
+				fwnode, i);
+			goto out_iounmap;
+		}
+
+		mmios_va[i] = ioremap(mmios[i].start, resource_size(&mmios[i]));
+		if (!mmios_va[i]) {
+			rc = -EIO;
+			pr_err("%pfwP: unable to map MMIO regset %d\n",
+				fwnode, i);
+			goto out_iounmap;
+		}
+	}
+
+	/* Initialize local (or per-CPU )state */
+	rc = imsic_local_init();
+	if (rc) {
+		pr_err("%pfwP: failed to initialize local state\n",
+		       fwnode);
+		goto out_iounmap;
+	}
+
+	/* Configure handlers for target CPUs */
+	for (i = 0; i < nr_parent_irqs; i++) {
+		rc = imsic_get_parent_hartid(fwnode, i, &hartid);
+		if (rc) {
+			pr_warn("%pfwP: hart ID for parent irq%d not found\n",
+				fwnode, i);
+			continue;
+		}
+
+		cpu = riscv_hartid_to_cpuid(hartid);
+		if (cpu < 0) {
+			pr_warn("%pfwP: invalid cpuid for parent irq%d\n",
+				fwnode, i);
+			continue;
+		}
+
+		/* Find MMIO location of MSI page */
+		index = nr_mmios;
+		reloff = i * BIT(global->guest_index_bits) *
+			 IMSIC_MMIO_PAGE_SZ;
+		for (j = 0; nr_mmios; j++) {
+			if (reloff < resource_size(&mmios[j])) {
+				index = j;
+				break;
+			}
+
+			/*
+			 * MMIO region size may not be aligned to
+			 * BIT(global->guest_index_bits) * IMSIC_MMIO_PAGE_SZ
+			 * if holes are present.
+			 */
+			reloff -= ALIGN(resource_size(&mmios[j]),
+			BIT(global->guest_index_bits) * IMSIC_MMIO_PAGE_SZ);
+		}
+		if (index >= nr_mmios) {
+			pr_warn("%pfwP: MMIO not found for parent irq%d\n",
+				fwnode, i);
+			continue;
+		}
+
+		local = per_cpu_ptr(global->local, cpu);
+		local->msi_pa = mmios[index].start + reloff;
+		local->msi_va = mmios_va[index] + reloff;
+
+		nr_handlers++;
+	}
+
+	/* If no CPU handlers found then can't take interrupts */
+	if (!nr_handlers) {
+		pr_err("%pfwP: No CPU handlers found\n", fwnode);
+		rc = -ENODEV;
+		goto out_local_cleanup;
+	}
+
+	/* Initialize matrix allocator */
+	rc = imsic_matrix_init();
+	if (rc) {
+		pr_err("%pfwP: failed to create matrix allocator\n",
+		       fwnode);
+		goto out_local_cleanup;
+	}
+
+	/* We don't need MMIO arrays anymore so let's free-up */
+	kfree(mmios_va);
+	kfree(mmios);
+
+	return 0;
+
+out_local_cleanup:
+	imsic_local_cleanup();
+out_iounmap:
+	for (i = 0; i < nr_mmios; i++) {
+		if (mmios_va[i])
+			iounmap(mmios_va[i]);
+	}
+	kfree(mmios_va);
+	kfree(mmios);
+out_free_local:
+	free_percpu(imsic->global.local);
+out_free_priv:
+	kfree(imsic);
+	imsic = NULL;
+	return rc;
+}
diff --git a/drivers/irqchip/irq-riscv-imsic-state.h b/drivers/irqchip/irq-riscv-imsic-state.h
new file mode 100644
index 000000000000..f0c983db99eb
--- /dev/null
+++ b/drivers/irqchip/irq-riscv-imsic-state.h
@@ -0,0 +1,98 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ * Copyright (C) 2022 Ventana Micro Systems Inc.
+ */
+
+#ifndef _IRQ_RISCV_IMSIC_STATE_H
+#define _IRQ_RISCV_IMSIC_STATE_H
+
+#include <linux/irqchip/riscv-imsic.h>
+#include <linux/irqdomain.h>
+#include <linux/fwnode.h>
+#include <linux/timer.h>
+
+#define IMSIC_IPI_ID				1
+#define IMSIC_NR_IPI				8
+
+struct imsic_vector {
+	/* Fixed details of the vector */
+	unsigned int				cpu;
+	unsigned int				local_id;
+	/* Details saved by driver in the vector */
+	unsigned int				hwirq;
+	/* Details accessed using local lock held */
+	bool					enable;
+	struct imsic_vector			*move;
+};
+
+struct imsic_local_priv {
+	/* Local lock to protect vector enable/move variables and dirty bitmap */
+	raw_spinlock_t				lock;
+
+	/* Local dirty bitmap for synchronization */
+	unsigned long				*dirty_bitmap;
+
+#ifdef CONFIG_SMP
+	/* Local timer for synchronization */
+	struct timer_list			timer;
+#endif
+
+	/* Local vector table */
+	struct imsic_vector			*vectors;
+};
+
+struct imsic_priv {
+	/* Device details */
+	struct fwnode_handle			*fwnode;
+
+	/* Global configuration common for all HARTs */
+	struct imsic_global_config		global;
+
+	/* Per-CPU state */
+	struct imsic_local_priv __percpu	*lpriv;
+
+	/* State of IRQ matrix allocator */
+	raw_spinlock_t				matrix_lock;
+	struct irq_matrix			*matrix;
+
+	/* IRQ domains (created by platform driver) */
+	struct irq_domain			*base_domain;
+};
+
+extern struct imsic_priv *imsic;
+
+void __imsic_eix_update(unsigned long base_id, unsigned long num_id, bool pend, bool val);
+
+static inline void __imsic_id_set_enable(unsigned long id)
+{
+	__imsic_eix_update(id, 1, false, true);
+}
+
+static inline void __imsic_id_clear_enable(unsigned long id)
+{
+	__imsic_eix_update(id, 1, false, false);
+}
+
+void imsic_local_sync_all(void);
+void imsic_local_delivery(bool enable);
+
+void imsic_vector_mask(struct imsic_vector *vec);
+void imsic_vector_unmask(struct imsic_vector *vec);
+bool imsic_vector_isenabled(struct imsic_vector *vec);
+struct imsic_vector *imsic_vector_get_move(struct imsic_vector *vec);
+void imsic_vector_move(struct imsic_vector *old_vec, struct imsic_vector *new_vec);
+
+struct imsic_vector *imsic_vector_from_local_id(unsigned int cpu, unsigned int local_id);
+
+struct imsic_vector *imsic_vector_alloc(unsigned int hwirq, const struct cpumask *mask);
+void imsic_vector_free(struct imsic_vector *vector);
+
+void imsic_vector_debug_show(struct seq_file *m, struct imsic_vector *vec, int ind);
+void imsic_vector_debug_show_summary(struct seq_file *m, int ind);
+
+void imsic_state_online(void);
+void imsic_state_offline(void);
+int imsic_setup_state(struct fwnode_handle *fwnode);
+
+#endif
diff --git a/include/linux/irqchip/riscv-imsic.h b/include/linux/irqchip/riscv-imsic.h
new file mode 100644
index 000000000000..b997eb277b5b
--- /dev/null
+++ b/include/linux/irqchip/riscv-imsic.h
@@ -0,0 +1,87 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ * Copyright (C) 2022 Ventana Micro Systems Inc.
+ */
+#ifndef __LINUX_IRQCHIP_RISCV_IMSIC_H
+#define __LINUX_IRQCHIP_RISCV_IMSIC_H
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <asm/csr.h>
+
+#define IMSIC_MMIO_PAGE_SHIFT		12
+#define IMSIC_MMIO_PAGE_SZ		BIT(IMSIC_MMIO_PAGE_SHIFT)
+#define IMSIC_MMIO_PAGE_LE		0x00
+#define IMSIC_MMIO_PAGE_BE		0x04
+
+#define IMSIC_MIN_ID			63
+#define IMSIC_MAX_ID			2048
+
+#define IMSIC_EIDELIVERY		0x70
+
+#define IMSIC_EITHRESHOLD		0x72
+
+#define IMSIC_EIP0			0x80
+#define IMSIC_EIP63			0xbf
+#define IMSIC_EIPx_BITS			32
+
+#define IMSIC_EIE0			0xc0
+#define IMSIC_EIE63			0xff
+#define IMSIC_EIEx_BITS			32
+
+#define IMSIC_FIRST			IMSIC_EIDELIVERY
+#define IMSIC_LAST			IMSIC_EIE63
+
+#define IMSIC_MMIO_SETIPNUM_LE		0x00
+#define IMSIC_MMIO_SETIPNUM_BE		0x04
+
+struct imsic_local_config {
+	phys_addr_t				msi_pa;
+	void __iomem				*msi_va;
+};
+
+struct imsic_global_config {
+	/*
+	 * MSI Target Address Scheme
+	 *
+	 * XLEN-1                                                12     0
+	 * |                                                     |     |
+	 * -------------------------------------------------------------
+	 * |xxxxxx|Group Index|xxxxxxxxxxx|HART Index|Guest Index|  0  |
+	 * -------------------------------------------------------------
+	 */
+
+	/* Bits representing Guest index, HART index, and Group index */
+	u32					guest_index_bits;
+	u32					hart_index_bits;
+	u32					group_index_bits;
+	u32					group_index_shift;
+
+	/* Global base address matching all target MSI addresses */
+	phys_addr_t				base_addr;
+
+	/* Number of interrupt identities */
+	u32					nr_ids;
+
+	/* Number of guest interrupt identities */
+	u32					nr_guest_ids;
+
+	/* Per-CPU IMSIC addresses */
+	struct imsic_local_config __percpu	*local;
+};
+
+#ifdef CONFIG_RISCV_IMSIC
+
+extern const struct imsic_global_config *imsic_get_global_config(void);
+
+#else
+
+static inline const struct imsic_global_config *imsic_get_global_config(void)
+{
+	return NULL;
+}
+
+#endif
+
+#endif