[v11,24/26] virt: gunyah: Add irqfd interface

Message ID 20230304010632.2127470-25-quic_eberman@quicinc.com
State New
Headers
Series Drivers for gunyah hypervisor |

Commit Message

Elliot Berman March 4, 2023, 1:06 a.m. UTC
  Enable support for creating irqfds which can raise an interrupt on a
Gunyah virtual machine. irqfds are exposed to userspace as a Gunyah VM
function with the name "irqfd". If the VM devicetree is not configured
to create a doorbell with the corresponding label, userspace will still
be able to assert the eventfd but no interrupt will be raised on the
guest.

Co-developed-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
---
 Documentation/virt/gunyah/vm-manager.rst |   2 +-
 drivers/virt/gunyah/Kconfig              |   9 ++
 drivers/virt/gunyah/Makefile             |   1 +
 drivers/virt/gunyah/gunyah_irqfd.c       | 164 +++++++++++++++++++++++
 include/linux/gunyah.h                   |   5 +
 include/uapi/linux/gunyah.h              |  30 +++++
 6 files changed, 210 insertions(+), 1 deletion(-)
 create mode 100644 drivers/virt/gunyah/gunyah_irqfd.c
  

Comments

Alex Elder March 31, 2023, 2:27 p.m. UTC | #1
On 3/3/23 7:06 PM, Elliot Berman wrote:
> Enable support for creating irqfds which can raise an interrupt on a
> Gunyah virtual machine. irqfds are exposed to userspace as a Gunyah VM
> function with the name "irqfd". If the VM devicetree is not configured
> to create a doorbell with the corresponding label, userspace will still
> be able to assert the eventfd but no interrupt will be raised on the
> guest.
> 
> Co-developed-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
> Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>

I suggest a few things below, including some code simplification.
I also have a few questions (which could possibly be answered by
adding comments).

					-Alex
> ---
>   Documentation/virt/gunyah/vm-manager.rst |   2 +-
>   drivers/virt/gunyah/Kconfig              |   9 ++
>   drivers/virt/gunyah/Makefile             |   1 +
>   drivers/virt/gunyah/gunyah_irqfd.c       | 164 +++++++++++++++++++++++
>   include/linux/gunyah.h                   |   5 +
>   include/uapi/linux/gunyah.h              |  30 +++++
>   6 files changed, 210 insertions(+), 1 deletion(-)
>   create mode 100644 drivers/virt/gunyah/gunyah_irqfd.c
> 
> diff --git a/Documentation/virt/gunyah/vm-manager.rst b/Documentation/virt/gunyah/vm-manager.rst
> index 83d326b0d11f..a1dd70f0cbf6 100644
> --- a/Documentation/virt/gunyah/vm-manager.rst
> +++ b/Documentation/virt/gunyah/vm-manager.rst
> @@ -124,7 +124,7 @@ the VM starts.
>   The possible types are documented below:
>   
>   .. kernel-doc:: include/uapi/linux/gunyah.h
> -   :identifiers: GH_FN_VCPU gh_fn_vcpu_arg
> +   :identifiers: GH_FN_VCPU gh_fn_vcpu_arg GH_FN_IRQFD gh_fn_irqfd_arg
>   
>   Gunyah VCPU API Descriptions
>   ----------------------------
> diff --git a/drivers/virt/gunyah/Kconfig b/drivers/virt/gunyah/Kconfig
> index 4c1c6110b50e..2cde24d429d1 100644
> --- a/drivers/virt/gunyah/Kconfig
> +++ b/drivers/virt/gunyah/Kconfig
> @@ -26,3 +26,12 @@ config GUNYAH_VCPU
>   	  VMMs can also handle stage 2 faults of the vCPUs.
>   
>   	  Say Y/M here if unsure and you want to support Gunyah VMMs.
> +
> +config GUNYAH_IRQFD
> +	tristate "Gunyah irqfd interface"
> +	depends on GUNYAH
> +	help
> +	  Enable kernel support for creating irqfds which can raise an interrupt
> +	  on Gunyah virtual machine.
> +
> +	  Say Y/M here if unsure and you want to support Gunyah VMMs.
> diff --git a/drivers/virt/gunyah/Makefile b/drivers/virt/gunyah/Makefile
> index 2d1b604a7b03..6cf756bfa3c2 100644
> --- a/drivers/virt/gunyah/Makefile
> +++ b/drivers/virt/gunyah/Makefile
> @@ -7,3 +7,4 @@ gunyah_rsc_mgr-y += rsc_mgr.o rsc_mgr_rpc.o vm_mgr.o vm_mgr_mm.o
>   obj-$(CONFIG_GUNYAH) += gunyah_rsc_mgr.o
>   
>   obj-$(CONFIG_GUNYAH_VCPU) += gunyah_vcpu.o
> +obj-$(CONFIG_GUNYAH_IRQFD) += gunyah_irqfd.o
> diff --git a/drivers/virt/gunyah/gunyah_irqfd.c b/drivers/virt/gunyah/gunyah_irqfd.c
> new file mode 100644
> index 000000000000..38e5fe266b00
> --- /dev/null
> +++ b/drivers/virt/gunyah/gunyah_irqfd.c
> @@ -0,0 +1,164 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
> + */
> +
> +#include <linux/eventfd.h>
> +#include <linux/file.h>
> +#include <linux/fs.h>
> +#include <linux/gunyah.h>
> +#include <linux/gunyah_vm_mgr.h>
> +#include <linux/module.h>
> +#include <linux/poll.h>
> +#include <linux/printk.h>
> +
> +#include <uapi/linux/gunyah.h>
> +
> +struct gh_irqfd {
> +	struct gh_resource *ghrsc;
> +	struct gh_vm_resource_ticket ticket;
> +	struct gh_vm_function_instance *f;
> +
> +	bool level;
> +
> +	struct eventfd_ctx *ctx;
> +	wait_queue_entry_t wait;
> +	poll_table pt;
> +};
> +
> +static int irqfd_wakeup(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key)
> +{
> +	struct gh_irqfd *irqfd = container_of(wait, struct gh_irqfd, wait);
> +	__poll_t flags = key_to_poll(key);
> +	u64 enable_mask = GH_BELL_NONBLOCK;
> +	u64 old_flags;
> +	int ret = 0;
> +
> +	if (flags & EPOLLIN) {
> +		if (irqfd->ghrsc) {
> +			ret = gh_hypercall_bell_send(irqfd->ghrsc->capid, enable_mask, &old_flags);

I commented elsewhere that you might support passing a null
pointer as the last argument above (since you don't use the
result).

> +			if (ret)
> +				pr_err_ratelimited("Failed to inject interrupt %d: %d\n",
> +						irqfd->ticket.label, ret);
> +		} else
> +			pr_err_ratelimited("Premature injection of interrupt\n");
> +	}
> +
> +	return 0;
> +}
> +
> +static void irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, poll_table *pt)
> +{
> +	struct gh_irqfd *irq_ctx = container_of(pt, struct gh_irqfd, pt);
> +
> +	add_wait_queue(wqh, &irq_ctx->wait);
> +}
> +
> +static int gh_irqfd_populate(struct gh_vm_resource_ticket *ticket, struct gh_resource *ghrsc)
> +{
> +	struct gh_irqfd *irqfd = container_of(ticket, struct gh_irqfd, ticket);
> +	u64 enable_mask = GH_BELL_NONBLOCK;
> +	u64 ack_mask = ~0;

Why is the ACK mask ~0?

I guess I don't know details about this hypercall (do you document
them somewhere?), so it's hard to judge whether or why this is the
right thing to use.  The enable_mask is just GH_BELL_NONBLOCK,
which is just BIT(32).

> +	int ret = 0;
> +
> +	if (irqfd->ghrsc) {
> +		pr_warn("irqfd%d already got a Gunyah resource. Check if multiple resources with same label were configured.\n",

s/%d/%u/

> +			irqfd->ticket.label);
> +		return -1;

I would say you should return -EBUSY here instead.

However, all callers just check for a zero/nonzero result, so
you could instead have this function (and the pointer it's
assigned to) to return Boolean instead (and return true on
success).

> +	}
> +
> +	irqfd->ghrsc = ghrsc;
> +	if (irqfd->level) {

I think I don't understand this part of the code well
enough to know this.  What happens if level is false?

> +		ret = gh_hypercall_bell_set_mask(irqfd->ghrsc->capid, enable_mask, ack_mask);
> +		if (ret)
> +			pr_warn("irq %d couldn't be set as level triggered. Might cause IRQ storm if asserted\n",
> +				irqfd->ticket.label);
> +	}
> +
> +	return 0;
> +}
> +
> +static void gh_irqfd_unpopulate(struct gh_vm_resource_ticket *ticket, struct gh_resource *ghrsc)
> +{
> +	struct gh_irqfd *irqfd = container_of(ticket, struct gh_irqfd, ticket);
> +	u64 cnt;
> +
> +	eventfd_ctx_remove_wait_queue(irqfd->ctx, &irqfd->wait, &cnt);
> +}
> +
> +static long gh_irqfd_bind(struct gh_vm_function_instance *f)
> +{
> +	struct gh_fn_irqfd_arg *args = f->argp;
> +	struct gh_irqfd *irqfd;
> +	__poll_t events;
> +	struct fd fd;
> +	long r;
> +
> +	if (f->arg_size != sizeof(*args))
> +		return -EINVAL;
> +
> +	/* All other flag bits are reserved for future use */
> +	if (args->flags & ~GH_IRQFD_LEVEL)
> +		return -EINVAL;
> +
> +	irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
> +	if (!irqfd)
> +		return -ENOMEM;
> +
> +	irqfd->f = f;
> +	f->data = irqfd;
> +

In the next section you get a temporary reference to the FD,
then look up the eventfd context from its file.  But in
gh_ioeventfd_bind() you just call eventfd_ctx_fdget().

I *think* you can do the same here, but perhaps I'm missing
something.


> +	fd = fdget(args->fd);
> +	if (!fd.file) {
> +		kfree(irqfd);
> +		return -EBADF;
> +	}
> +
> +	irqfd->ctx = eventfd_ctx_fileget(fd.file);
> +	if (IS_ERR(irqfd->ctx)) {
> +		r = PTR_ERR(irqfd->ctx);
> +		goto err_fdput;
> +	}
> +

I.e., rather than the two function calls above, you could just
call:

	irqfd->ctx = eventfd_ctx_fdget(args->fd);

And in that case you also wouldn't need the fdput() call in the
error path below.

> +	if (args->flags & GH_IRQFD_LEVEL)
> +		irqfd->level = true;
> +
> +	init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
> +	init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
> +
> +	irqfd->ticket.resource_type = GH_RESOURCE_TYPE_BELL_TX;
> +	irqfd->ticket.label = args->label;
> +	irqfd->ticket.owner = THIS_MODULE;
> +	irqfd->ticket.populate = gh_irqfd_populate;
> +	irqfd->ticket.unpopulate = gh_irqfd_unpopulate;
> +
> +	r = gh_vm_add_resource_ticket(f->ghvm, &irqfd->ticket);
> +	if (r)
> +		goto err_ctx;
> +
> +	events = vfs_poll(fd.file, &irqfd->pt);
> +	if (events & EPOLLIN)
> +		pr_warn("Premature injection of interrupt\n");
> +	fdput(fd);
> +
> +	return 0;
> +err_ctx:
> +	eventfd_ctx_put(irqfd->ctx);
> +err_fdput:
> +	fdput(fd);
> +	kfree(irqfd);
> +	return r;
> +}
> +
> +static void gh_irqfd_unbind(struct gh_vm_function_instance *f)
> +{
> +	struct gh_irqfd *irqfd = f->data;
> +
> +	gh_vm_remove_resource_ticket(irqfd->f->ghvm, &irqfd->ticket);
> +	eventfd_ctx_put(irqfd->ctx);
> +	kfree(irqfd);
> +}
> +
> +DECLARE_GH_VM_FUNCTION_INIT(irqfd, GH_FN_IRQFD, gh_irqfd_bind, gh_irqfd_unbind);
> +MODULE_DESCRIPTION("Gunyah irqfds");

Maybe singular, and maybe "Gunyah irqfd VM function(s)".

> +MODULE_LICENSE("GPL");
> diff --git a/include/linux/gunyah.h b/include/linux/gunyah.h
> index 63395dacc1a8..0344b6988cfa 100644
> --- a/include/linux/gunyah.h
> +++ b/include/linux/gunyah.h
> @@ -33,6 +33,11 @@ struct gh_resource {
>   	u32 rm_label;
>   };
>   
> +/**
> + * Gunyah Doorbells
> + */
> +#define GH_BELL_NONBLOCK		BIT(32)
> +
>   /**
>    * Gunyah Message Queues
>    */
> diff --git a/include/uapi/linux/gunyah.h b/include/uapi/linux/gunyah.h
> index e52265fa5715..5617dadc1c7b 100644
> --- a/include/uapi/linux/gunyah.h
> +++ b/include/uapi/linux/gunyah.h
> @@ -76,6 +76,19 @@ struct gh_vm_dtb_config {
>    */
>   #define GH_FN_VCPU 		1
>   
> +/**
> + * GH_FN_IRQFD - register eventfd to assert a Gunyah doorbell
> + *
> + * gh_fn_desc is filled with gh_fn_irqfd_arg
> + *
> + * Allows setting an eventfd to directly trigger a guest interrupt.
> + * irqfd.fd specifies the file descriptor to use as the eventfd.
> + * irqfd.label corresponds to the doorbell label used in the guest VM's devicetree.
> + *
> + * Return: 0
> + */
> +#define GH_FN_IRQFD 		2
> +
>   #define GH_FN_MAX_ARG_SIZE		256
>   
>   /**
> @@ -88,6 +101,23 @@ struct gh_fn_vcpu_arg {
>   
>   #define GH_IRQFD_LEVEL			(1UL << 0)

This is associated with the IRQFD "flags" field, so I'd name it
GH_IRQFD_FLAGS_LEVEL.

>   
> +/**
> + * struct gh_fn_irqfd_arg - Arguments to create an irqfd function
> + * @fd: an eventfd which when written to will raise a doorbell
> + * @label: Label of the doorbell created on the guest VM
> + * @flags: GH_IRQFD_LEVEL configures the corresponding doorbell to behave
> + *         like a level triggered interrupt.
> + * @padding: padding bytes
> + */
> +struct gh_fn_irqfd_arg {
> +	__u32 fd;

Should the "fd" field be signed?  Should it be an int?  (Perhaps
you're trying to define a fixed kernel API, so __s32 if signed would
be better.)

> +	__u32 label;
> +	__u32 flags;
> +	__u32 padding;
> +};
> +
> +#define GH_IOEVENTFD_DATAMATCH		(1UL << 0)
> +
>   /**
>    * struct gh_fn_desc - Arguments to create a VM function
>    * @type: Type of the function. See GH_FN_* macro for supported types
  
Elliot Berman April 17, 2023, 10:55 p.m. UTC | #2
On 3/31/2023 7:27 AM, Alex Elder wrote:
> On 3/3/23 7:06 PM, Elliot Berman wrote:

[snip]

>> +
>> +static int irqfd_wakeup(wait_queue_entry_t *wait, unsigned int mode, 
>> int sync, void *key)
>> +{
>> +    struct gh_irqfd *irqfd = container_of(wait, struct gh_irqfd, wait);
>> +    __poll_t flags = key_to_poll(key);
>> +    u64 enable_mask = GH_BELL_NONBLOCK;
>> +    u64 old_flags;
>> +    int ret = 0;
>> +
>> +    if (flags & EPOLLIN) {
>> +        if (irqfd->ghrsc) {
>> +            ret = gh_hypercall_bell_send(irqfd->ghrsc->capid, 
>> enable_mask, &old_flags);
> 
> I commented elsewhere that you might support passing a null
> pointer as the last argument above (since you don't use the
> result).
> 
>> +            if (ret)
>> +                pr_err_ratelimited("Failed to inject interrupt %d: 
>> %d\n",
>> +                        irqfd->ticket.label, ret);
>> +        } else
>> +            pr_err_ratelimited("Premature injection of interrupt\n");
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static void irqfd_ptable_queue_proc(struct file *file, 
>> wait_queue_head_t *wqh, poll_table *pt)
>> +{
>> +    struct gh_irqfd *irq_ctx = container_of(pt, struct gh_irqfd, pt);
>> +
>> +    add_wait_queue(wqh, &irq_ctx->wait);
>> +}
>> +
>> +static int gh_irqfd_populate(struct gh_vm_resource_ticket *ticket, 
>> struct gh_resource *ghrsc)
>> +{
>> +    struct gh_irqfd *irqfd = container_of(ticket, struct gh_irqfd, 
>> ticket);
>> +    u64 enable_mask = GH_BELL_NONBLOCK;
>> +    u64 ack_mask = ~0;
> 
> Why is the ACK mask ~0?
> 
> I guess I don't know details about this hypercall (do you document
> them somewhere?), so it's hard to judge whether or why this is the
> right thing to use.  The enable_mask is just GH_BELL_NONBLOCK,
> which is just BIT(32).
> 

I talked to our hypervisor folks and they mentioned we can simplify 
this. In v12, enable_mask and ack_mask can just be "1" (BIT(0)). We had 
chosen bit 32 arbitrarily.

[snip]

> 
>> +    }
>> +
>> +    irqfd->ghrsc = ghrsc;
>> +    if (irqfd->level) {
> 
> I think I don't understand this part of the code well
> enough to know this.  What happens if level is false?
> 

If level is false, then guest is assumed to set up IRQ on its side as 
edge-triggered. In that case, we don't need to configure the enable 
mask/ack mask because the doorbell flags aren't polled.

[snip]

>> +/**
>> + * struct gh_fn_irqfd_arg - Arguments to create an irqfd function
>> + * @fd: an eventfd which when written to will raise a doorbell
>> + * @label: Label of the doorbell created on the guest VM
>> + * @flags: GH_IRQFD_LEVEL configures the corresponding doorbell to 
>> behave
>> + *         like a level triggered interrupt.
>> + * @padding: padding bytes
>> + */
>> +struct gh_fn_irqfd_arg {
>> +    __u32 fd;
> 
> Should the "fd" field be signed?  Should it be an int?  (Perhaps
> you're trying to define a fixed kernel API, so __s32 if signed would
> be better.)
> 

It looked to me like some interfaces use __u32 and some use __s32. Is 
one technically correct?
  
Alex Elder April 18, 2023, 12:55 p.m. UTC | #3
On 4/17/23 5:55 PM, Elliot Berman wrote:
>>>
>>> +struct gh_fn_irqfd_arg {
>>> +    __u32 fd;
>>
>> Should the "fd" field be signed?  Should it be an int?  (Perhaps
>> you're trying to define a fixed kernel API, so __s32 if signed would
>> be better.)
>>
> 
> It looked to me like some interfaces use __u32 and some use __s32. Is 
> one technically correct?

Good question.  It depends on how you use it.

It's a file descriptor, so it should be an int, and it appears
that's always a 32-bit signed (for 32 and 64 bit machines).
So the size seems to be right.

Whether it's signed or not I think depends on whether you
ever save an error value in this field.  I doubt you do,
but if you do, it should be signed.  Otherwise, the largest
value will never exceed INT_MAX/S32_MAX; and in that case
either is fine.

Will Gunyah ever run on a 32-bit machine?

					-Alex
  

Patch

diff --git a/Documentation/virt/gunyah/vm-manager.rst b/Documentation/virt/gunyah/vm-manager.rst
index 83d326b0d11f..a1dd70f0cbf6 100644
--- a/Documentation/virt/gunyah/vm-manager.rst
+++ b/Documentation/virt/gunyah/vm-manager.rst
@@ -124,7 +124,7 @@  the VM starts.
 The possible types are documented below:
 
 .. kernel-doc:: include/uapi/linux/gunyah.h
-   :identifiers: GH_FN_VCPU gh_fn_vcpu_arg
+   :identifiers: GH_FN_VCPU gh_fn_vcpu_arg GH_FN_IRQFD gh_fn_irqfd_arg
 
 Gunyah VCPU API Descriptions
 ----------------------------
diff --git a/drivers/virt/gunyah/Kconfig b/drivers/virt/gunyah/Kconfig
index 4c1c6110b50e..2cde24d429d1 100644
--- a/drivers/virt/gunyah/Kconfig
+++ b/drivers/virt/gunyah/Kconfig
@@ -26,3 +26,12 @@  config GUNYAH_VCPU
 	  VMMs can also handle stage 2 faults of the vCPUs.
 
 	  Say Y/M here if unsure and you want to support Gunyah VMMs.
+
+config GUNYAH_IRQFD
+	tristate "Gunyah irqfd interface"
+	depends on GUNYAH
+	help
+	  Enable kernel support for creating irqfds which can raise an interrupt
+	  on Gunyah virtual machine.
+
+	  Say Y/M here if unsure and you want to support Gunyah VMMs.
diff --git a/drivers/virt/gunyah/Makefile b/drivers/virt/gunyah/Makefile
index 2d1b604a7b03..6cf756bfa3c2 100644
--- a/drivers/virt/gunyah/Makefile
+++ b/drivers/virt/gunyah/Makefile
@@ -7,3 +7,4 @@  gunyah_rsc_mgr-y += rsc_mgr.o rsc_mgr_rpc.o vm_mgr.o vm_mgr_mm.o
 obj-$(CONFIG_GUNYAH) += gunyah_rsc_mgr.o
 
 obj-$(CONFIG_GUNYAH_VCPU) += gunyah_vcpu.o
+obj-$(CONFIG_GUNYAH_IRQFD) += gunyah_irqfd.o
diff --git a/drivers/virt/gunyah/gunyah_irqfd.c b/drivers/virt/gunyah/gunyah_irqfd.c
new file mode 100644
index 000000000000..38e5fe266b00
--- /dev/null
+++ b/drivers/virt/gunyah/gunyah_irqfd.c
@@ -0,0 +1,164 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#include <linux/eventfd.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/gunyah.h>
+#include <linux/gunyah_vm_mgr.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/printk.h>
+
+#include <uapi/linux/gunyah.h>
+
+struct gh_irqfd {
+	struct gh_resource *ghrsc;
+	struct gh_vm_resource_ticket ticket;
+	struct gh_vm_function_instance *f;
+
+	bool level;
+
+	struct eventfd_ctx *ctx;
+	wait_queue_entry_t wait;
+	poll_table pt;
+};
+
+static int irqfd_wakeup(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key)
+{
+	struct gh_irqfd *irqfd = container_of(wait, struct gh_irqfd, wait);
+	__poll_t flags = key_to_poll(key);
+	u64 enable_mask = GH_BELL_NONBLOCK;
+	u64 old_flags;
+	int ret = 0;
+
+	if (flags & EPOLLIN) {
+		if (irqfd->ghrsc) {
+			ret = gh_hypercall_bell_send(irqfd->ghrsc->capid, enable_mask, &old_flags);
+			if (ret)
+				pr_err_ratelimited("Failed to inject interrupt %d: %d\n",
+						irqfd->ticket.label, ret);
+		} else
+			pr_err_ratelimited("Premature injection of interrupt\n");
+	}
+
+	return 0;
+}
+
+static void irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, poll_table *pt)
+{
+	struct gh_irqfd *irq_ctx = container_of(pt, struct gh_irqfd, pt);
+
+	add_wait_queue(wqh, &irq_ctx->wait);
+}
+
+static int gh_irqfd_populate(struct gh_vm_resource_ticket *ticket, struct gh_resource *ghrsc)
+{
+	struct gh_irqfd *irqfd = container_of(ticket, struct gh_irqfd, ticket);
+	u64 enable_mask = GH_BELL_NONBLOCK;
+	u64 ack_mask = ~0;
+	int ret = 0;
+
+	if (irqfd->ghrsc) {
+		pr_warn("irqfd%d already got a Gunyah resource. Check if multiple resources with same label were configured.\n",
+			irqfd->ticket.label);
+		return -1;
+	}
+
+	irqfd->ghrsc = ghrsc;
+	if (irqfd->level) {
+		ret = gh_hypercall_bell_set_mask(irqfd->ghrsc->capid, enable_mask, ack_mask);
+		if (ret)
+			pr_warn("irq %d couldn't be set as level triggered. Might cause IRQ storm if asserted\n",
+				irqfd->ticket.label);
+	}
+
+	return 0;
+}
+
+static void gh_irqfd_unpopulate(struct gh_vm_resource_ticket *ticket, struct gh_resource *ghrsc)
+{
+	struct gh_irqfd *irqfd = container_of(ticket, struct gh_irqfd, ticket);
+	u64 cnt;
+
+	eventfd_ctx_remove_wait_queue(irqfd->ctx, &irqfd->wait, &cnt);
+}
+
+static long gh_irqfd_bind(struct gh_vm_function_instance *f)
+{
+	struct gh_fn_irqfd_arg *args = f->argp;
+	struct gh_irqfd *irqfd;
+	__poll_t events;
+	struct fd fd;
+	long r;
+
+	if (f->arg_size != sizeof(*args))
+		return -EINVAL;
+
+	/* All other flag bits are reserved for future use */
+	if (args->flags & ~GH_IRQFD_LEVEL)
+		return -EINVAL;
+
+	irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
+	if (!irqfd)
+		return -ENOMEM;
+
+	irqfd->f = f;
+	f->data = irqfd;
+
+	fd = fdget(args->fd);
+	if (!fd.file) {
+		kfree(irqfd);
+		return -EBADF;
+	}
+
+	irqfd->ctx = eventfd_ctx_fileget(fd.file);
+	if (IS_ERR(irqfd->ctx)) {
+		r = PTR_ERR(irqfd->ctx);
+		goto err_fdput;
+	}
+
+	if (args->flags & GH_IRQFD_LEVEL)
+		irqfd->level = true;
+
+	init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
+	init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
+
+	irqfd->ticket.resource_type = GH_RESOURCE_TYPE_BELL_TX;
+	irqfd->ticket.label = args->label;
+	irqfd->ticket.owner = THIS_MODULE;
+	irqfd->ticket.populate = gh_irqfd_populate;
+	irqfd->ticket.unpopulate = gh_irqfd_unpopulate;
+
+	r = gh_vm_add_resource_ticket(f->ghvm, &irqfd->ticket);
+	if (r)
+		goto err_ctx;
+
+	events = vfs_poll(fd.file, &irqfd->pt);
+	if (events & EPOLLIN)
+		pr_warn("Premature injection of interrupt\n");
+	fdput(fd);
+
+	return 0;
+err_ctx:
+	eventfd_ctx_put(irqfd->ctx);
+err_fdput:
+	fdput(fd);
+	kfree(irqfd);
+	return r;
+}
+
+static void gh_irqfd_unbind(struct gh_vm_function_instance *f)
+{
+	struct gh_irqfd *irqfd = f->data;
+
+	gh_vm_remove_resource_ticket(irqfd->f->ghvm, &irqfd->ticket);
+	eventfd_ctx_put(irqfd->ctx);
+	kfree(irqfd);
+}
+
+DECLARE_GH_VM_FUNCTION_INIT(irqfd, GH_FN_IRQFD, gh_irqfd_bind, gh_irqfd_unbind);
+MODULE_DESCRIPTION("Gunyah irqfds");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/gunyah.h b/include/linux/gunyah.h
index 63395dacc1a8..0344b6988cfa 100644
--- a/include/linux/gunyah.h
+++ b/include/linux/gunyah.h
@@ -33,6 +33,11 @@  struct gh_resource {
 	u32 rm_label;
 };
 
+/**
+ * Gunyah Doorbells
+ */
+#define GH_BELL_NONBLOCK		BIT(32)
+
 /**
  * Gunyah Message Queues
  */
diff --git a/include/uapi/linux/gunyah.h b/include/uapi/linux/gunyah.h
index e52265fa5715..5617dadc1c7b 100644
--- a/include/uapi/linux/gunyah.h
+++ b/include/uapi/linux/gunyah.h
@@ -76,6 +76,19 @@  struct gh_vm_dtb_config {
  */
 #define GH_FN_VCPU 		1
 
+/**
+ * GH_FN_IRQFD - register eventfd to assert a Gunyah doorbell
+ *
+ * gh_fn_desc is filled with gh_fn_irqfd_arg
+ *
+ * Allows setting an eventfd to directly trigger a guest interrupt.
+ * irqfd.fd specifies the file descriptor to use as the eventfd.
+ * irqfd.label corresponds to the doorbell label used in the guest VM's devicetree.
+ *
+ * Return: 0
+ */
+#define GH_FN_IRQFD 		2
+
 #define GH_FN_MAX_ARG_SIZE		256
 
 /**
@@ -88,6 +101,23 @@  struct gh_fn_vcpu_arg {
 
 #define GH_IRQFD_LEVEL			(1UL << 0)
 
+/**
+ * struct gh_fn_irqfd_arg - Arguments to create an irqfd function
+ * @fd: an eventfd which when written to will raise a doorbell
+ * @label: Label of the doorbell created on the guest VM
+ * @flags: GH_IRQFD_LEVEL configures the corresponding doorbell to behave
+ *         like a level triggered interrupt.
+ * @padding: padding bytes
+ */
+struct gh_fn_irqfd_arg {
+	__u32 fd;
+	__u32 label;
+	__u32 flags;
+	__u32 padding;
+};
+
+#define GH_IOEVENTFD_DATAMATCH		(1UL << 0)
+
 /**
  * struct gh_fn_desc - Arguments to create a VM function
  * @type: Type of the function. See GH_FN_* macro for supported types