[07/17] vfio/pci: Preserve per-interrupt contexts

Message ID d6e32e0e7adaf61da39fb6cd2863298b15a2663e.1706849424.git.reinette.chatre@intel.com
State New
Headers
Series vfio/pci: Remove duplicate code and logic from VFIO PCI interrupt management |

Commit Message

Reinette Chatre Feb. 2, 2024, 4:57 a.m. UTC
  MSI and MSI-X interrupt management for PCI passthrough devices create
a new per-interrupt context every time an interrupt is allocated,
freeing it when the interrupt is freed.

The per-interrupt context contains the properties of a particular
interrupt. Without a property that persists across interrupt allocation
and free it is acceptable to always create a new per-interrupt context.

INTx interrupt context has a "masked" property that persists across
allocation and free and thus preserves its interrupt context
across interrupt allocation and free calls.

MSI and MSI-X interrupts already remain allocated across interrupt
allocation and free requests, additionally maintaining the
individual interrupt context is a reflection of this existing
behavior and matches INTx behavior so that more code can be shared.

An additional benefit is that maintaining interrupt context supports
a potential future use case of emulated interrupts, where the
"is this interrupt emulated" is a property that needs to persist
across allocation and free requests.

Persistent interrupt contexts means that existence of per-interrupt
context no longer implies a valid trigger, pointers to freed memory
should be cleared, and a new per-interrupt context cannot be assumed
needing allocation when an interrupt is allocated.

Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
---
Note to maintainers:
This addition originally formed part of the IMS work below that mostly
ignored INTx. This work focuses on INTx, MSI, MSI-X where this addition
is relevant.
https://lore.kernel.org/lkml/cover.1696609476.git.reinette.chatre@intel.com

 drivers/vfio/pci/vfio_pci_intrs.c | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)
  

Comments

Alex Williamson Feb. 5, 2024, 10:35 p.m. UTC | #1
On Thu,  1 Feb 2024 20:57:01 -0800
Reinette Chatre <reinette.chatre@intel.com> wrote:

> MSI and MSI-X interrupt management for PCI passthrough devices create
> a new per-interrupt context every time an interrupt is allocated,
> freeing it when the interrupt is freed.
> 
> The per-interrupt context contains the properties of a particular
> interrupt. Without a property that persists across interrupt allocation
> and free it is acceptable to always create a new per-interrupt context.
> 
> INTx interrupt context has a "masked" property that persists across
> allocation and free and thus preserves its interrupt context
> across interrupt allocation and free calls.
> 
> MSI and MSI-X interrupts already remain allocated across interrupt
> allocation and free requests, additionally maintaining the
> individual interrupt context is a reflection of this existing
> behavior and matches INTx behavior so that more code can be shared.
> 
> An additional benefit is that maintaining interrupt context supports
> a potential future use case of emulated interrupts, where the
> "is this interrupt emulated" is a property that needs to persist
> across allocation and free requests.
> 
> Persistent interrupt contexts means that existence of per-interrupt
> context no longer implies a valid trigger, pointers to freed memory
> should be cleared, and a new per-interrupt context cannot be assumed
> needing allocation when an interrupt is allocated.
> 
> Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
> ---
> Note to maintainers:
> This addition originally formed part of the IMS work below that mostly
> ignored INTx. This work focuses on INTx, MSI, MSI-X where this addition
> is relevant.
> https://lore.kernel.org/lkml/cover.1696609476.git.reinette.chatre@intel.com
> 
>  drivers/vfio/pci/vfio_pci_intrs.c | 26 ++++++++++++++------------
>  1 file changed, 14 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
> index 31f73c70fcd2..7ca2b983b66e 100644
> --- a/drivers/vfio/pci/vfio_pci_intrs.c
> +++ b/drivers/vfio/pci/vfio_pci_intrs.c
> @@ -427,7 +427,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
>  
>  	ctx = vfio_irq_ctx_get(vdev, vector);
>  
> -	if (ctx) {
> +	if (ctx && ctx->trigger) {
>  		irq_bypass_unregister_producer(&ctx->producer);
>  		irq = pci_irq_vector(pdev, vector);
>  		cmd = vfio_pci_memory_lock_and_enable(vdev);
> @@ -435,8 +435,9 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
>  		vfio_pci_memory_unlock_and_restore(vdev, cmd);
>  		/* Interrupt stays allocated, will be freed at MSI-X disable. */
>  		kfree(ctx->name);
> +		ctx->name = NULL;

Setting ctx->name = NULL is not strictly necessary and does not match
the INTx code that we're claiming to try to emulate.  ctx->name is only
tested immediately after allocation below, otherwise it can be inferred
from ctx->trigger.  Thanks,

Alex

>  		eventfd_ctx_put(ctx->trigger);
> -		vfio_irq_ctx_free(vdev, ctx, vector);
> +		ctx->trigger = NULL;
>  	}
>  
>  	if (fd < 0)
> @@ -449,16 +450,17 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
>  			return irq;
>  	}
>  
> -	ctx = vfio_irq_ctx_alloc(vdev, vector);
> -	if (!ctx)
> -		return -ENOMEM;
> +	/* Per-interrupt context remain allocated. */
> +	if (!ctx) {
> +		ctx = vfio_irq_ctx_alloc(vdev, vector);
> +		if (!ctx)
> +			return -ENOMEM;
> +	}
>  
>  	ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-msi%s[%d](%s)",
>  			      msix ? "x" : "", vector, pci_name(pdev));
> -	if (!ctx->name) {
> -		ret = -ENOMEM;
> -		goto out_free_ctx;
> -	}
> +	if (!ctx->name)
> +		return -ENOMEM;
>  
>  	trigger = eventfd_ctx_fdget(fd);
>  	if (IS_ERR(trigger)) {
> @@ -502,8 +504,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
>  	eventfd_ctx_put(trigger);
>  out_free_name:
>  	kfree(ctx->name);
> -out_free_ctx:
> -	vfio_irq_ctx_free(vdev, ctx, vector);
> +	ctx->name = NULL;
>  	return ret;
>  }
>  
> @@ -539,6 +540,7 @@ static void vfio_msi_disable(struct vfio_pci_core_device *vdev,
>  		vfio_virqfd_disable(&ctx->unmask);
>  		vfio_virqfd_disable(&ctx->mask);
>  		vfio_msi_set_vector_signal(vdev, i, -1, index);
> +		vfio_irq_ctx_free(vdev, ctx, i);
>  	}
>  
>  	cmd = vfio_pci_memory_lock_and_enable(vdev);
> @@ -694,7 +696,7 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev,
>  
>  	for (i = start; i < start + count; i++) {
>  		ctx = vfio_irq_ctx_get(vdev, i);
> -		if (!ctx)
> +		if (!ctx || !ctx->trigger)
>  			continue;
>  		if (flags & VFIO_IRQ_SET_DATA_NONE) {
>  			eventfd_signal(ctx->trigger);
  
Reinette Chatre Feb. 6, 2024, 9:45 p.m. UTC | #2
Hi Alex,

On 2/5/2024 2:35 PM, Alex Williamson wrote:
> On Thu,  1 Feb 2024 20:57:01 -0800
> Reinette Chatre <reinette.chatre@intel.com> wrote:

..

>> diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
>> index 31f73c70fcd2..7ca2b983b66e 100644
>> --- a/drivers/vfio/pci/vfio_pci_intrs.c
>> +++ b/drivers/vfio/pci/vfio_pci_intrs.c
>> @@ -427,7 +427,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
>>  
>>  	ctx = vfio_irq_ctx_get(vdev, vector);
>>  
>> -	if (ctx) {
>> +	if (ctx && ctx->trigger) {
>>  		irq_bypass_unregister_producer(&ctx->producer);
>>  		irq = pci_irq_vector(pdev, vector);
>>  		cmd = vfio_pci_memory_lock_and_enable(vdev);
>> @@ -435,8 +435,9 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
>>  		vfio_pci_memory_unlock_and_restore(vdev, cmd);
>>  		/* Interrupt stays allocated, will be freed at MSI-X disable. */
>>  		kfree(ctx->name);
>> +		ctx->name = NULL;
> 
> Setting ctx->name = NULL is not strictly necessary and does not match
> the INTx code that we're claiming to try to emulate.  ctx->name is only
> tested immediately after allocation below, otherwise it can be inferred
> from ctx->trigger.  Thanks,

This all matches my understanding. I added ctx->name = NULL after every kfree(ctx->name)
(see below for confirmation of other instance). You are correct that the flow
infers validity of ctx->name from ctx->trigger. My motivation for
adding ctx->name = NULL is that, since the interrupt context persists, this
change ensures that there will be no pointer that points to freed memory. I
am not comfortable leaving pointers to freed memory around.

>>  		eventfd_ctx_put(ctx->trigger);
>> -		vfio_irq_ctx_free(vdev, ctx, vector);
>> +		ctx->trigger = NULL;
>>  	}
>>  
>>  	if (fd < 0)
>> @@ -449,16 +450,17 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
>>  			return irq;
>>  	}
>>  
>> -	ctx = vfio_irq_ctx_alloc(vdev, vector);
>> -	if (!ctx)
>> -		return -ENOMEM;
>> +	/* Per-interrupt context remain allocated. */
>> +	if (!ctx) {
>> +		ctx = vfio_irq_ctx_alloc(vdev, vector);
>> +		if (!ctx)
>> +			return -ENOMEM;
>> +	}
>>  
>>  	ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-msi%s[%d](%s)",
>>  			      msix ? "x" : "", vector, pci_name(pdev));
>> -	if (!ctx->name) {
>> -		ret = -ENOMEM;
>> -		goto out_free_ctx;
>> -	}
>> +	if (!ctx->name)
>> +		return -ENOMEM;
>>  
>>  	trigger = eventfd_ctx_fdget(fd);
>>  	if (IS_ERR(trigger)) {
>> @@ -502,8 +504,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
>>  	eventfd_ctx_put(trigger);
>>  out_free_name:
>>  	kfree(ctx->name);
>> -out_free_ctx:
>> -	vfio_irq_ctx_free(vdev, ctx, vector);
>> +	ctx->name = NULL;

Here is the other one.

Reinette
  
Alex Williamson Feb. 6, 2024, 10:03 p.m. UTC | #3
On Tue, 6 Feb 2024 13:45:22 -0800
Reinette Chatre <reinette.chatre@intel.com> wrote:

> Hi Alex,
> 
> On 2/5/2024 2:35 PM, Alex Williamson wrote:
> > On Thu,  1 Feb 2024 20:57:01 -0800
> > Reinette Chatre <reinette.chatre@intel.com> wrote:  
> 
> ..
> 
> >> diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
> >> index 31f73c70fcd2..7ca2b983b66e 100644
> >> --- a/drivers/vfio/pci/vfio_pci_intrs.c
> >> +++ b/drivers/vfio/pci/vfio_pci_intrs.c
> >> @@ -427,7 +427,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
> >>  
> >>  	ctx = vfio_irq_ctx_get(vdev, vector);
> >>  
> >> -	if (ctx) {
> >> +	if (ctx && ctx->trigger) {
> >>  		irq_bypass_unregister_producer(&ctx->producer);
> >>  		irq = pci_irq_vector(pdev, vector);
> >>  		cmd = vfio_pci_memory_lock_and_enable(vdev);
> >> @@ -435,8 +435,9 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
> >>  		vfio_pci_memory_unlock_and_restore(vdev, cmd);
> >>  		/* Interrupt stays allocated, will be freed at MSI-X disable. */
> >>  		kfree(ctx->name);
> >> +		ctx->name = NULL;  
> > 
> > Setting ctx->name = NULL is not strictly necessary and does not match
> > the INTx code that we're claiming to try to emulate.  ctx->name is only
> > tested immediately after allocation below, otherwise it can be inferred
> > from ctx->trigger.  Thanks,  
> 
> This all matches my understanding. I added ctx->name = NULL after every kfree(ctx->name)
> (see below for confirmation of other instance). You are correct that the flow
> infers validity of ctx->name from ctx->trigger. My motivation for
> adding ctx->name = NULL is that, since the interrupt context persists, this
> change ensures that there will be no pointer that points to freed memory. I
> am not comfortable leaving pointers to freed memory around.

Fair enough.  Maybe note the change in the commit log.  Thanks,

Alex

> >>  		eventfd_ctx_put(ctx->trigger);
> >> -		vfio_irq_ctx_free(vdev, ctx, vector);
> >> +		ctx->trigger = NULL;
> >>  	}
> >>  
> >>  	if (fd < 0)
> >> @@ -449,16 +450,17 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
> >>  			return irq;
> >>  	}
> >>  
> >> -	ctx = vfio_irq_ctx_alloc(vdev, vector);
> >> -	if (!ctx)
> >> -		return -ENOMEM;
> >> +	/* Per-interrupt context remain allocated. */
> >> +	if (!ctx) {
> >> +		ctx = vfio_irq_ctx_alloc(vdev, vector);
> >> +		if (!ctx)
> >> +			return -ENOMEM;
> >> +	}
> >>  
> >>  	ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-msi%s[%d](%s)",
> >>  			      msix ? "x" : "", vector, pci_name(pdev));
> >> -	if (!ctx->name) {
> >> -		ret = -ENOMEM;
> >> -		goto out_free_ctx;
> >> -	}
> >> +	if (!ctx->name)
> >> +		return -ENOMEM;
> >>  
> >>  	trigger = eventfd_ctx_fdget(fd);
> >>  	if (IS_ERR(trigger)) {
> >> @@ -502,8 +504,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
> >>  	eventfd_ctx_put(trigger);
> >>  out_free_name:
> >>  	kfree(ctx->name);
> >> -out_free_ctx:
> >> -	vfio_irq_ctx_free(vdev, ctx, vector);
> >> +	ctx->name = NULL;  
> 
> Here is the other one.
> 
> Reinette
>
  
Reinette Chatre Feb. 6, 2024, 10:21 p.m. UTC | #4
Hi Alex,

On 2/6/2024 2:03 PM, Alex Williamson wrote:
> On Tue, 6 Feb 2024 13:45:22 -0800
> Reinette Chatre <reinette.chatre@intel.com> wrote:
>> On 2/5/2024 2:35 PM, Alex Williamson wrote:
>>> On Thu,  1 Feb 2024 20:57:01 -0800
>>> Reinette Chatre <reinette.chatre@intel.com> wrote:  
>>
>> ..
>>
>>>> diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
>>>> index 31f73c70fcd2..7ca2b983b66e 100644
>>>> --- a/drivers/vfio/pci/vfio_pci_intrs.c
>>>> +++ b/drivers/vfio/pci/vfio_pci_intrs.c
>>>> @@ -427,7 +427,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
>>>>  
>>>>  	ctx = vfio_irq_ctx_get(vdev, vector);
>>>>  
>>>> -	if (ctx) {
>>>> +	if (ctx && ctx->trigger) {
>>>>  		irq_bypass_unregister_producer(&ctx->producer);
>>>>  		irq = pci_irq_vector(pdev, vector);
>>>>  		cmd = vfio_pci_memory_lock_and_enable(vdev);
>>>> @@ -435,8 +435,9 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
>>>>  		vfio_pci_memory_unlock_and_restore(vdev, cmd);
>>>>  		/* Interrupt stays allocated, will be freed at MSI-X disable. */
>>>>  		kfree(ctx->name);
>>>> +		ctx->name = NULL;  
>>>
>>> Setting ctx->name = NULL is not strictly necessary and does not match
>>> the INTx code that we're claiming to try to emulate.  ctx->name is only
>>> tested immediately after allocation below, otherwise it can be inferred
>>> from ctx->trigger.  Thanks,  
>>
>> This all matches my understanding. I added ctx->name = NULL after every kfree(ctx->name)
>> (see below for confirmation of other instance). You are correct that the flow
>> infers validity of ctx->name from ctx->trigger. My motivation for
>> adding ctx->name = NULL is that, since the interrupt context persists, this
>> change ensures that there will be no pointer that points to freed memory. I
>> am not comfortable leaving pointers to freed memory around.
> 
> Fair enough.  Maybe note the change in the commit log.  Thanks,
> 

Will do. Thank you.

Reinette
  

Patch

diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 31f73c70fcd2..7ca2b983b66e 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -427,7 +427,7 @@  static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
 
 	ctx = vfio_irq_ctx_get(vdev, vector);
 
-	if (ctx) {
+	if (ctx && ctx->trigger) {
 		irq_bypass_unregister_producer(&ctx->producer);
 		irq = pci_irq_vector(pdev, vector);
 		cmd = vfio_pci_memory_lock_and_enable(vdev);
@@ -435,8 +435,9 @@  static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
 		vfio_pci_memory_unlock_and_restore(vdev, cmd);
 		/* Interrupt stays allocated, will be freed at MSI-X disable. */
 		kfree(ctx->name);
+		ctx->name = NULL;
 		eventfd_ctx_put(ctx->trigger);
-		vfio_irq_ctx_free(vdev, ctx, vector);
+		ctx->trigger = NULL;
 	}
 
 	if (fd < 0)
@@ -449,16 +450,17 @@  static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
 			return irq;
 	}
 
-	ctx = vfio_irq_ctx_alloc(vdev, vector);
-	if (!ctx)
-		return -ENOMEM;
+	/* Per-interrupt context remain allocated. */
+	if (!ctx) {
+		ctx = vfio_irq_ctx_alloc(vdev, vector);
+		if (!ctx)
+			return -ENOMEM;
+	}
 
 	ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-msi%s[%d](%s)",
 			      msix ? "x" : "", vector, pci_name(pdev));
-	if (!ctx->name) {
-		ret = -ENOMEM;
-		goto out_free_ctx;
-	}
+	if (!ctx->name)
+		return -ENOMEM;
 
 	trigger = eventfd_ctx_fdget(fd);
 	if (IS_ERR(trigger)) {
@@ -502,8 +504,7 @@  static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
 	eventfd_ctx_put(trigger);
 out_free_name:
 	kfree(ctx->name);
-out_free_ctx:
-	vfio_irq_ctx_free(vdev, ctx, vector);
+	ctx->name = NULL;
 	return ret;
 }
 
@@ -539,6 +540,7 @@  static void vfio_msi_disable(struct vfio_pci_core_device *vdev,
 		vfio_virqfd_disable(&ctx->unmask);
 		vfio_virqfd_disable(&ctx->mask);
 		vfio_msi_set_vector_signal(vdev, i, -1, index);
+		vfio_irq_ctx_free(vdev, ctx, i);
 	}
 
 	cmd = vfio_pci_memory_lock_and_enable(vdev);
@@ -694,7 +696,7 @@  static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev,
 
 	for (i = start; i < start + count; i++) {
 		ctx = vfio_irq_ctx_get(vdev, i);
-		if (!ctx)
+		if (!ctx || !ctx->trigger)
 			continue;
 		if (flags & VFIO_IRQ_SET_DATA_NONE) {
 			eventfd_signal(ctx->trigger);