[RFC] vdpa/mlx5: preserve CVQ vringh index

Message ID 1698350834-415881-1-git-send-email-steven.sistare@oracle.com
State New
Headers
Series [RFC] vdpa/mlx5: preserve CVQ vringh index |

Commit Message

Steven Sistare Oct. 26, 2023, 8:07 p.m. UTC
  mlx5_vdpa does not preserve userland's view of vring base for the control
queue in the following sequence:

ioctl VHOST_SET_VRING_BASE
ioctl VHOST_VDPA_SET_STATUS VIRTIO_CONFIG_S_DRIVER_OK
  mlx5_vdpa_set_status()
    setup_cvq_vring()
      vringh_init_iotlb()
        vringh_init_kern()
          vrh->last_avail_idx = 0;
ioctl VHOST_GET_VRING_BASE

To fix, restore the value of cvq->vring.last_avail_idx after calling
vringh_init_iotlb.

Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
 drivers/vdpa/mlx5/net/mlx5_vnet.c |  7 ++++++-
 drivers/vhost/vringh.c            | 30 ++++++++++++++++++++++++++++++
 include/linux/vringh.h            |  2 ++
 3 files changed, 38 insertions(+), 1 deletion(-)
  

Comments

Jason Wang Oct. 27, 2023, 6:31 a.m. UTC | #1
On Fri, Oct 27, 2023 at 4:07 AM Steve Sistare <steven.sistare@oracle.com> wrote:
>
> mlx5_vdpa does not preserve userland's view of vring base for the control
> queue in the following sequence:
>
> ioctl VHOST_SET_VRING_BASE
> ioctl VHOST_VDPA_SET_STATUS VIRTIO_CONFIG_S_DRIVER_OK
>   mlx5_vdpa_set_status()
>     setup_cvq_vring()
>       vringh_init_iotlb()
>         vringh_init_kern()
>           vrh->last_avail_idx = 0;
> ioctl VHOST_GET_VRING_BASE
>
> To fix, restore the value of cvq->vring.last_avail_idx after calling
> vringh_init_iotlb.
>
> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> ---
>  drivers/vdpa/mlx5/net/mlx5_vnet.c |  7 ++++++-
>  drivers/vhost/vringh.c            | 30 ++++++++++++++++++++++++++++++
>  include/linux/vringh.h            |  2 ++
>  3 files changed, 38 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 946488b8989f..f64758143115 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -2795,13 +2795,18 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
>         struct mlx5_control_vq *cvq = &mvdev->cvq;
>         int err = 0;
>
> -       if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
> +       if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
> +               u16 last_avail_idx = cvq->vring.last_avail_idx;
> +
>                 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
>                                         MLX5_CVQ_MAX_ENT, false,
>                                         (struct vring_desc *)(uintptr_t)cvq->desc_addr,
>                                         (struct vring_avail *)(uintptr_t)cvq->driver_addr,
>                                         (struct vring_used *)(uintptr_t)cvq->device_addr);
>
> +               if (!err)
> +                       vringh_set_base_iotlb(&cvq->vring, last_avail_idx);

Btw, vringh_set_base_iotlb() deserves an independent patch and it
seems it is not specific to IOTLB, so we probably need an indirection
to have vringh_set_base() first.

Or I wonder if it's better to just introduce a new parameter to
vringh_init_iotlb()...

Thanks

> +       }
>         return err;
>  }
>
> diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
> index 7b8fd977f71c..799762c83007 100644
> --- a/drivers/vhost/vringh.c
> +++ b/drivers/vhost/vringh.c
> @@ -595,6 +595,24 @@ static inline void __vringh_notify_disable(struct vringh *vrh,
>         }
>  }
>
> +static inline int __vringh_set_base(struct vringh *vrh, u16 idx,
> +                           int (*putu16)(const struct vringh *vrh,
> +                               __virtio16 *p, u16 val))
> +{
> +    int ret;
> +
> +    ret = putu16(vrh, &vrh->vring.avail->idx, idx);
> +    if (ret)
> +        return ret;
> +
> +    ret = putu16(vrh, &vrh->vring.used->idx, idx);
> +    if (ret)
> +        return ret;
> +
> +    vrh->last_avail_idx = vrh->last_used_idx = idx;
> +    return 0;
> +}
> +
>  /* Userspace access helpers: in this case, addresses are really userspace. */
>  static inline int getu16_user(const struct vringh *vrh, u16 *val, const __virtio16 *p)
>  {
> @@ -1456,6 +1474,18 @@ void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
>  }
>  EXPORT_SYMBOL(vringh_set_iotlb);
>
> +/**
> + * vringh_set_base_iotlb - set avail_idx and used_idx
> + * @vrh: the vring
> + * @idx: the value to set
> + */
> +int vringh_set_base_iotlb(struct vringh *vrh, u16 idx)
> +{
> +    return __vringh_set_base(vrh, idx, putu16_iotlb);
> +}
> +EXPORT_SYMBOL(vringh_set_base_iotlb);
> +
> +
>  /**
>   * vringh_getdesc_iotlb - get next available descriptor from ring with
>   * IOTLB.
> diff --git a/include/linux/vringh.h b/include/linux/vringh.h
> index c3a8117dabe8..e9b8af4e6a5e 100644
> --- a/include/linux/vringh.h
> +++ b/include/linux/vringh.h
> @@ -306,6 +306,8 @@ int vringh_init_iotlb_va(struct vringh *vrh, u64 features,
>                          struct vring_avail *avail,
>                          struct vring_used *used);
>
> +int vringh_set_base_iotlb(struct vringh *vrh, u16 idx);
> +
>  int vringh_getdesc_iotlb(struct vringh *vrh,
>                          struct vringh_kiov *riov,
>                          struct vringh_kiov *wiov,
> --
> 2.39.3
>
  
Steven Sistare Oct. 30, 2023, 2:05 p.m. UTC | #2
On 10/27/2023 2:31 AM, Jason Wang wrote:
> On Fri, Oct 27, 2023 at 4:07 AM Steve Sistare <steven.sistare@oracle.com> wrote:
>>
>> mlx5_vdpa does not preserve userland's view of vring base for the control
>> queue in the following sequence:
>>
>> ioctl VHOST_SET_VRING_BASE
>> ioctl VHOST_VDPA_SET_STATUS VIRTIO_CONFIG_S_DRIVER_OK
>>   mlx5_vdpa_set_status()
>>     setup_cvq_vring()
>>       vringh_init_iotlb()
>>         vringh_init_kern()
>>           vrh->last_avail_idx = 0;
>> ioctl VHOST_GET_VRING_BASE
>>
>> To fix, restore the value of cvq->vring.last_avail_idx after calling
>> vringh_init_iotlb.
>>
>> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
>> ---
>>  drivers/vdpa/mlx5/net/mlx5_vnet.c |  7 ++++++-
>>  drivers/vhost/vringh.c            | 30 ++++++++++++++++++++++++++++++
>>  include/linux/vringh.h            |  2 ++
>>  3 files changed, 38 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> index 946488b8989f..f64758143115 100644
>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> @@ -2795,13 +2795,18 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
>>         struct mlx5_control_vq *cvq = &mvdev->cvq;
>>         int err = 0;
>>
>> -       if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
>> +       if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
>> +               u16 last_avail_idx = cvq->vring.last_avail_idx;
>> +
>>                 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
>>                                         MLX5_CVQ_MAX_ENT, false,
>>                                         (struct vring_desc *)(uintptr_t)cvq->desc_addr,
>>                                         (struct vring_avail *)(uintptr_t)cvq->driver_addr,
>>                                         (struct vring_used *)(uintptr_t)cvq->device_addr);
>>
>> +               if (!err)
>> +                       vringh_set_base_iotlb(&cvq->vring, last_avail_idx);
> 
> Btw, vringh_set_base_iotlb() deserves an independent patch and it
> seems it is not specific to IOTLB, 

Agreed on both.  I initially submitted the smallest change needed to show the problem.

so we probably need an indirection
> to have vringh_set_base() first.

Not sure what you mean.  I have defined:

    static inline int __vringh_set_base() ...

    int vringh_set_base_iotlb()
        return __vringh_set_base(vrh, idx, putu16_iotlb);

to which I would add:

    int vringh_set_base_user()
        return __vringh_set_base(vrh, idx, putu16_user);

    int vringh_set_base_kern()
        return __vringh_set_base(vrh, idx, putu16_kern;

all in the same patch.

The call site in mlx5_vnet.c would be a 2nd patch.

- Steve

> Or I wonder if it's better to just introduce a new parameter to
> vringh_init_iotlb()...

I considered that, but IMO the parameter list there is already large, and it would
be strange to add a parameter for the initial value of avail, but not for used, and
no one needs the latter.

- Steve

>> +       }
>>         return err;
>>  }
>>
>> diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
>> index 7b8fd977f71c..799762c83007 100644
>> --- a/drivers/vhost/vringh.c
>> +++ b/drivers/vhost/vringh.c
>> @@ -595,6 +595,24 @@ static inline void __vringh_notify_disable(struct vringh *vrh,
>>         }
>>  }
>>
>> +static inline int __vringh_set_base(struct vringh *vrh, u16 idx,
>> +                           int (*putu16)(const struct vringh *vrh,
>> +                               __virtio16 *p, u16 val))
>> +{
>> +    int ret;
>> +
>> +    ret = putu16(vrh, &vrh->vring.avail->idx, idx);
>> +    if (ret)
>> +        return ret;
>> +
>> +    ret = putu16(vrh, &vrh->vring.used->idx, idx);
>> +    if (ret)
>> +        return ret;
>> +
>> +    vrh->last_avail_idx = vrh->last_used_idx = idx;
>> +    return 0;
>> +}
>> +
>>  /* Userspace access helpers: in this case, addresses are really userspace. */
>>  static inline int getu16_user(const struct vringh *vrh, u16 *val, const __virtio16 *p)
>>  {
>> @@ -1456,6 +1474,18 @@ void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
>>  }
>>  EXPORT_SYMBOL(vringh_set_iotlb);
>>
>> +/**
>> + * vringh_set_base_iotlb - set avail_idx and used_idx
>> + * @vrh: the vring
>> + * @idx: the value to set
>> + */
>> +int vringh_set_base_iotlb(struct vringh *vrh, u16 idx)
>> +{
>> +    return __vringh_set_base(vrh, idx, putu16_iotlb);
>> +}
>> +EXPORT_SYMBOL(vringh_set_base_iotlb);
>> +
>> +
>>  /**
>>   * vringh_getdesc_iotlb - get next available descriptor from ring with
>>   * IOTLB.
>> diff --git a/include/linux/vringh.h b/include/linux/vringh.h
>> index c3a8117dabe8..e9b8af4e6a5e 100644
>> --- a/include/linux/vringh.h
>> +++ b/include/linux/vringh.h
>> @@ -306,6 +306,8 @@ int vringh_init_iotlb_va(struct vringh *vrh, u64 features,
>>                          struct vring_avail *avail,
>>                          struct vring_used *used);
>>
>> +int vringh_set_base_iotlb(struct vringh *vrh, u16 idx);
>> +
>>  int vringh_getdesc_iotlb(struct vringh *vrh,
>>                          struct vringh_kiov *riov,
>>                          struct vringh_kiov *wiov,
>> --
>> 2.39.3
>>
>
  
Eugenio Perez Martin Oct. 30, 2023, 3:12 p.m. UTC | #3
On Thu, Oct 26, 2023 at 10:09 PM Steve Sistare
<steven.sistare@oracle.com> wrote:
>
> mlx5_vdpa does not preserve userland's view of vring base for the control
> queue in the following sequence:
>
> ioctl VHOST_SET_VRING_BASE
> ioctl VHOST_VDPA_SET_STATUS VIRTIO_CONFIG_S_DRIVER_OK
>   mlx5_vdpa_set_status()
>     setup_cvq_vring()
>       vringh_init_iotlb()
>         vringh_init_kern()
>           vrh->last_avail_idx = 0;
> ioctl VHOST_GET_VRING_BASE
>
> To fix, restore the value of cvq->vring.last_avail_idx after calling
> vringh_init_iotlb.
>
> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> ---
>  drivers/vdpa/mlx5/net/mlx5_vnet.c |  7 ++++++-
>  drivers/vhost/vringh.c            | 30 ++++++++++++++++++++++++++++++
>  include/linux/vringh.h            |  2 ++
>  3 files changed, 38 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 946488b8989f..f64758143115 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -2795,13 +2795,18 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
>         struct mlx5_control_vq *cvq = &mvdev->cvq;
>         int err = 0;
>
> -       if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
> +       if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
> +               u16 last_avail_idx = cvq->vring.last_avail_idx;
> +
>                 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
>                                         MLX5_CVQ_MAX_ENT, false,
>                                         (struct vring_desc *)(uintptr_t)cvq->desc_addr,
>                                         (struct vring_avail *)(uintptr_t)cvq->driver_addr,
>                                         (struct vring_used *)(uintptr_t)cvq->device_addr);
>
> +               if (!err)
> +                       vringh_set_base_iotlb(&cvq->vring, last_avail_idx);
> +       }
>         return err;
>  }
>
> diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
> index 7b8fd977f71c..799762c83007 100644
> --- a/drivers/vhost/vringh.c
> +++ b/drivers/vhost/vringh.c
> @@ -595,6 +595,24 @@ static inline void __vringh_notify_disable(struct vringh *vrh,
>         }
>  }
>
> +static inline int __vringh_set_base(struct vringh *vrh, u16 idx,
> +                           int (*putu16)(const struct vringh *vrh,
> +                               __virtio16 *p, u16 val))
> +{
> +    int ret;
> +
> +    ret = putu16(vrh, &vrh->vring.avail->idx, idx);
> +    if (ret)
> +        return ret;
> +
> +    ret = putu16(vrh, &vrh->vring.used->idx, idx);
> +    if (ret)
> +        return ret;
> +

I don't think VMM should be able to modify the guest's vring memory.
For vringh it should be enough with the next line, no need for
previous.

If I'm not wrong this was solved in the simulator by [1] and [2]. Am I
missing something?

Thanks!

[1] https://lkml.org/lkml/2023/1/18/1045
[2] https://www.spinics.net/lists/kernel/msg4705724.html

> +    vrh->last_avail_idx = vrh->last_used_idx = idx;
> +    return 0;
> +}
> +
>  /* Userspace access helpers: in this case, addresses are really userspace. */
>  static inline int getu16_user(const struct vringh *vrh, u16 *val, const __virtio16 *p)
>  {
> @@ -1456,6 +1474,18 @@ void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
>  }
>  EXPORT_SYMBOL(vringh_set_iotlb);
>
> +/**
> + * vringh_set_base_iotlb - set avail_idx and used_idx
> + * @vrh: the vring
> + * @idx: the value to set
> + */
> +int vringh_set_base_iotlb(struct vringh *vrh, u16 idx)
> +{
> +    return __vringh_set_base(vrh, idx, putu16_iotlb);
> +}
> +EXPORT_SYMBOL(vringh_set_base_iotlb);
> +
> +
>  /**
>   * vringh_getdesc_iotlb - get next available descriptor from ring with
>   * IOTLB.
> diff --git a/include/linux/vringh.h b/include/linux/vringh.h
> index c3a8117dabe8..e9b8af4e6a5e 100644
> --- a/include/linux/vringh.h
> +++ b/include/linux/vringh.h
> @@ -306,6 +306,8 @@ int vringh_init_iotlb_va(struct vringh *vrh, u64 features,
>                          struct vring_avail *avail,
>                          struct vring_used *used);
>
> +int vringh_set_base_iotlb(struct vringh *vrh, u16 idx);
> +
>  int vringh_getdesc_iotlb(struct vringh *vrh,
>                          struct vringh_kiov *riov,
>                          struct vringh_kiov *wiov,
> --
> 2.39.3
>
  
Steven Sistare Oct. 30, 2023, 5:55 p.m. UTC | #4
On 10/30/2023 11:12 AM, Eugenio Perez Martin wrote:
> On Thu, Oct 26, 2023 at 10:09 PM Steve Sistare
> <steven.sistare@oracle.com> wrote:
>>
>> mlx5_vdpa does not preserve userland's view of vring base for the control
>> queue in the following sequence:
>>
>> ioctl VHOST_SET_VRING_BASE
>> ioctl VHOST_VDPA_SET_STATUS VIRTIO_CONFIG_S_DRIVER_OK
>>   mlx5_vdpa_set_status()
>>     setup_cvq_vring()
>>       vringh_init_iotlb()
>>         vringh_init_kern()
>>           vrh->last_avail_idx = 0;
>> ioctl VHOST_GET_VRING_BASE
>>
>> To fix, restore the value of cvq->vring.last_avail_idx after calling
>> vringh_init_iotlb.
>>
>> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
>> ---
>>  drivers/vdpa/mlx5/net/mlx5_vnet.c |  7 ++++++-
>>  drivers/vhost/vringh.c            | 30 ++++++++++++++++++++++++++++++
>>  include/linux/vringh.h            |  2 ++
>>  3 files changed, 38 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> index 946488b8989f..f64758143115 100644
>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
>> @@ -2795,13 +2795,18 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
>>         struct mlx5_control_vq *cvq = &mvdev->cvq;
>>         int err = 0;
>>
>> -       if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
>> +       if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
>> +               u16 last_avail_idx = cvq->vring.last_avail_idx;
>> +
>>                 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
>>                                         MLX5_CVQ_MAX_ENT, false,
>>                                         (struct vring_desc *)(uintptr_t)cvq->desc_addr,
>>                                         (struct vring_avail *)(uintptr_t)cvq->driver_addr,
>>                                         (struct vring_used *)(uintptr_t)cvq->device_addr);
>>
>> +               if (!err)
>> +                       vringh_set_base_iotlb(&cvq->vring, last_avail_idx);
>> +       }
>>         return err;
>>  }
>>
>> diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
>> index 7b8fd977f71c..799762c83007 100644
>> --- a/drivers/vhost/vringh.c
>> +++ b/drivers/vhost/vringh.c
>> @@ -595,6 +595,24 @@ static inline void __vringh_notify_disable(struct vringh *vrh,
>>         }
>>  }
>>
>> +static inline int __vringh_set_base(struct vringh *vrh, u16 idx,
>> +                           int (*putu16)(const struct vringh *vrh,
>> +                               __virtio16 *p, u16 val))
>> +{
>> +    int ret;
>> +
>> +    ret = putu16(vrh, &vrh->vring.avail->idx, idx);
>> +    if (ret)
>> +        return ret;
>> +
>> +    ret = putu16(vrh, &vrh->vring.used->idx, idx);
>> +    if (ret)
>> +        return ret;
>> +
> 
> I don't think VMM should be able to modify the guest's vring memory.
> For vringh it should be enough with the next line, no need for
> previous.
> 
> If I'm not wrong this was solved in the simulator by [1] and [2]. Am I
> missing something?
> 
> Thanks!
> 
> [1] https://lkml.org/lkml/2023/1/18/1045
> [2] https://www.spinics.net/lists/kernel/msg4705724.html
> 
>> +    vrh->last_avail_idx = vrh->last_used_idx = idx;
>> +    return 0;
>> +}
>> +

OK, that makes sense.  I just verified that the idx I pass to vringh_set_base_iotlb 
does indeed match vrh->vring.avail->idx and vrh->vring.used->idx.  

With no need to putu16, the fix could be confined to a few lines in the mlx5 driver:

setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) {
    idx = cvq->vring.last_avail_idx;
    vringh_init_iotlb(&cvq->vring, ...);
    cvq->vring.last_avail_idx =  cvq->vring.last_used_idx = idx;
}

However, I wonder if re-syncing with the guest values would be a more robust fix:

setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) {
    vringh_init_iotlb(&cvq->vring, ...);
    vringh_sync_iotlb(&cvq->vring);
}

vringh_sync_iotlb(struct vringh *vrh) {
    getu16_iotlb(vrh, &vrh->last_avail_idx, &vrh->vring.avail->idx);
    getu16_iotlb(vrh, &vrh->last_used_idx, &vrh->vring.used->idx);
}

- Steve

>>  /* Userspace access helpers: in this case, addresses are really userspace. */
>>  static inline int getu16_user(const struct vringh *vrh, u16 *val, const __virtio16 *p)
>>  {
>> @@ -1456,6 +1474,18 @@ void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
>>  }
>>  EXPORT_SYMBOL(vringh_set_iotlb);
>>
>> +/**
>> + * vringh_set_base_iotlb - set avail_idx and used_idx
>> + * @vrh: the vring
>> + * @idx: the value to set
>> + */
>> +int vringh_set_base_iotlb(struct vringh *vrh, u16 idx)
>> +{
>> +    return __vringh_set_base(vrh, idx, putu16_iotlb);
>> +}
>> +EXPORT_SYMBOL(vringh_set_base_iotlb);
>> +
>> +
>>  /**
>>   * vringh_getdesc_iotlb - get next available descriptor from ring with
>>   * IOTLB.
>> diff --git a/include/linux/vringh.h b/include/linux/vringh.h
>> index c3a8117dabe8..e9b8af4e6a5e 100644
>> --- a/include/linux/vringh.h
>> +++ b/include/linux/vringh.h
>> @@ -306,6 +306,8 @@ int vringh_init_iotlb_va(struct vringh *vrh, u64 features,
>>                          struct vring_avail *avail,
>>                          struct vring_used *used);
>>
>> +int vringh_set_base_iotlb(struct vringh *vrh, u16 idx);
>> +
>>  int vringh_getdesc_iotlb(struct vringh *vrh,
>>                          struct vringh_kiov *riov,
>>                          struct vringh_kiov *wiov,
>> --
>> 2.39.3
>>
>
  
Eugenio Perez Martin Oct. 30, 2023, 6:28 p.m. UTC | #5
On Mon, Oct 30, 2023 at 6:56 PM Steven Sistare
<steven.sistare@oracle.com> wrote:
>
> On 10/30/2023 11:12 AM, Eugenio Perez Martin wrote:
> > On Thu, Oct 26, 2023 at 10:09 PM Steve Sistare
> > <steven.sistare@oracle.com> wrote:
> >>
> >> mlx5_vdpa does not preserve userland's view of vring base for the control
> >> queue in the following sequence:
> >>
> >> ioctl VHOST_SET_VRING_BASE
> >> ioctl VHOST_VDPA_SET_STATUS VIRTIO_CONFIG_S_DRIVER_OK
> >>   mlx5_vdpa_set_status()
> >>     setup_cvq_vring()
> >>       vringh_init_iotlb()
> >>         vringh_init_kern()
> >>           vrh->last_avail_idx = 0;
> >> ioctl VHOST_GET_VRING_BASE
> >>
> >> To fix, restore the value of cvq->vring.last_avail_idx after calling
> >> vringh_init_iotlb.
> >>
> >> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> >> ---
> >>  drivers/vdpa/mlx5/net/mlx5_vnet.c |  7 ++++++-
> >>  drivers/vhost/vringh.c            | 30 ++++++++++++++++++++++++++++++
> >>  include/linux/vringh.h            |  2 ++
> >>  3 files changed, 38 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> >> index 946488b8989f..f64758143115 100644
> >> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> >> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> >> @@ -2795,13 +2795,18 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
> >>         struct mlx5_control_vq *cvq = &mvdev->cvq;
> >>         int err = 0;
> >>
> >> -       if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
> >> +       if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
> >> +               u16 last_avail_idx = cvq->vring.last_avail_idx;
> >> +
> >>                 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
> >>                                         MLX5_CVQ_MAX_ENT, false,
> >>                                         (struct vring_desc *)(uintptr_t)cvq->desc_addr,
> >>                                         (struct vring_avail *)(uintptr_t)cvq->driver_addr,
> >>                                         (struct vring_used *)(uintptr_t)cvq->device_addr);
> >>
> >> +               if (!err)
> >> +                       vringh_set_base_iotlb(&cvq->vring, last_avail_idx);
> >> +       }
> >>         return err;
> >>  }
> >>
> >> diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
> >> index 7b8fd977f71c..799762c83007 100644
> >> --- a/drivers/vhost/vringh.c
> >> +++ b/drivers/vhost/vringh.c
> >> @@ -595,6 +595,24 @@ static inline void __vringh_notify_disable(struct vringh *vrh,
> >>         }
> >>  }
> >>
> >> +static inline int __vringh_set_base(struct vringh *vrh, u16 idx,
> >> +                           int (*putu16)(const struct vringh *vrh,
> >> +                               __virtio16 *p, u16 val))
> >> +{
> >> +    int ret;
> >> +
> >> +    ret = putu16(vrh, &vrh->vring.avail->idx, idx);
> >> +    if (ret)
> >> +        return ret;
> >> +
> >> +    ret = putu16(vrh, &vrh->vring.used->idx, idx);
> >> +    if (ret)
> >> +        return ret;
> >> +
> >
> > I don't think VMM should be able to modify the guest's vring memory.
> > For vringh it should be enough with the next line, no need for
> > previous.
> >
> > If I'm not wrong this was solved in the simulator by [1] and [2]. Am I
> > missing something?
> >
> > Thanks!
> >
> > [1] https://lkml.org/lkml/2023/1/18/1045
> > [2] https://www.spinics.net/lists/kernel/msg4705724.html
> >
> >> +    vrh->last_avail_idx = vrh->last_used_idx = idx;
> >> +    return 0;
> >> +}
> >> +
>
> OK, that makes sense.  I just verified that the idx I pass to vringh_set_base_iotlb
> does indeed match vrh->vring.avail->idx and vrh->vring.used->idx.
>
> With no need to putu16, the fix could be confined to a few lines in the mlx5 driver:
>
> setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) {
>     idx = cvq->vring.last_avail_idx;
>     vringh_init_iotlb(&cvq->vring, ...);
>     cvq->vring.last_avail_idx =  cvq->vring.last_used_idx = idx;
> }
>
> However, I wonder if re-syncing with the guest values would be a more robust fix:
>
> setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) {
>     vringh_init_iotlb(&cvq->vring, ...);
>     vringh_sync_iotlb(&cvq->vring);
> }
>
> vringh_sync_iotlb(struct vringh *vrh) {
>     getu16_iotlb(vrh, &vrh->last_avail_idx, &vrh->vring.avail->idx);

This is not valid. For example in the case of the net RX queue. The
guest fills it periodically with rx buffers, but the next index to use
is not avail_index but the index that vhost handles internally.

If any, it *could* be valid to restore both of them from used_idx,
since we trust the source to sync with the used ring. But this will
not be valid once we migrate inflight descriptors too. If parent
drivers start just restoring used idx for both, we will need to
complicate behavior of the feature flag that indicates the vDPA device
supports in-flight.

>     getu16_iotlb(vrh, &vrh->last_used_idx, &vrh->vring.used->idx);
> }
>
> - Steve
>
> >>  /* Userspace access helpers: in this case, addresses are really userspace. */
> >>  static inline int getu16_user(const struct vringh *vrh, u16 *val, const __virtio16 *p)
> >>  {
> >> @@ -1456,6 +1474,18 @@ void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
> >>  }
> >>  EXPORT_SYMBOL(vringh_set_iotlb);
> >>
> >> +/**
> >> + * vringh_set_base_iotlb - set avail_idx and used_idx
> >> + * @vrh: the vring
> >> + * @idx: the value to set
> >> + */
> >> +int vringh_set_base_iotlb(struct vringh *vrh, u16 idx)
> >> +{
> >> +    return __vringh_set_base(vrh, idx, putu16_iotlb);
> >> +}
> >> +EXPORT_SYMBOL(vringh_set_base_iotlb);
> >> +
> >> +
> >>  /**
> >>   * vringh_getdesc_iotlb - get next available descriptor from ring with
> >>   * IOTLB.
> >> diff --git a/include/linux/vringh.h b/include/linux/vringh.h
> >> index c3a8117dabe8..e9b8af4e6a5e 100644
> >> --- a/include/linux/vringh.h
> >> +++ b/include/linux/vringh.h
> >> @@ -306,6 +306,8 @@ int vringh_init_iotlb_va(struct vringh *vrh, u64 features,
> >>                          struct vring_avail *avail,
> >>                          struct vring_used *used);
> >>
> >> +int vringh_set_base_iotlb(struct vringh *vrh, u16 idx);
> >> +
> >>  int vringh_getdesc_iotlb(struct vringh *vrh,
> >>                          struct vringh_kiov *riov,
> >>                          struct vringh_kiov *wiov,
> >> --
> >> 2.39.3
> >>
> >
>
  
Jason Wang Oct. 31, 2023, 1:25 a.m. UTC | #6
On Mon, Oct 30, 2023 at 10:06 PM Steven Sistare
<steven.sistare@oracle.com> wrote:
>
> On 10/27/2023 2:31 AM, Jason Wang wrote:
> > On Fri, Oct 27, 2023 at 4:07 AM Steve Sistare <steven.sistare@oracle.com> wrote:
> >>
> >> mlx5_vdpa does not preserve userland's view of vring base for the control
> >> queue in the following sequence:
> >>
> >> ioctl VHOST_SET_VRING_BASE
> >> ioctl VHOST_VDPA_SET_STATUS VIRTIO_CONFIG_S_DRIVER_OK
> >>   mlx5_vdpa_set_status()
> >>     setup_cvq_vring()
> >>       vringh_init_iotlb()
> >>         vringh_init_kern()
> >>           vrh->last_avail_idx = 0;
> >> ioctl VHOST_GET_VRING_BASE
> >>
> >> To fix, restore the value of cvq->vring.last_avail_idx after calling
> >> vringh_init_iotlb.
> >>
> >> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> >> ---
> >>  drivers/vdpa/mlx5/net/mlx5_vnet.c |  7 ++++++-
> >>  drivers/vhost/vringh.c            | 30 ++++++++++++++++++++++++++++++
> >>  include/linux/vringh.h            |  2 ++
> >>  3 files changed, 38 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> >> index 946488b8989f..f64758143115 100644
> >> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> >> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> >> @@ -2795,13 +2795,18 @@ static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
> >>         struct mlx5_control_vq *cvq = &mvdev->cvq;
> >>         int err = 0;
> >>
> >> -       if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
> >> +       if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
> >> +               u16 last_avail_idx = cvq->vring.last_avail_idx;
> >> +
> >>                 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
> >>                                         MLX5_CVQ_MAX_ENT, false,
> >>                                         (struct vring_desc *)(uintptr_t)cvq->desc_addr,
> >>                                         (struct vring_avail *)(uintptr_t)cvq->driver_addr,
> >>                                         (struct vring_used *)(uintptr_t)cvq->device_addr);
> >>
> >> +               if (!err)
> >> +                       vringh_set_base_iotlb(&cvq->vring, last_avail_idx);
> >
> > Btw, vringh_set_base_iotlb() deserves an independent patch and it
> > seems it is not specific to IOTLB,
>
> Agreed on both.  I initially submitted the smallest change needed to show the problem.
>
> so we probably need an indirection
> > to have vringh_set_base() first.
>
> Not sure what you mean.  I have defined:
>
>     static inline int __vringh_set_base() ...
>
>     int vringh_set_base_iotlb()
>         return __vringh_set_base(vrh, idx, putu16_iotlb);
>
> to which I would add:
>
>     int vringh_set_base_user()
>         return __vringh_set_base(vrh, idx, putu16_user);
>
>     int vringh_set_base_kern()
>         return __vringh_set_base(vrh, idx, putu16_kern;
>
> all in the same patch.
>
> The call site in mlx5_vnet.c would be a 2nd patch.

Right, so we just need to split it.

>
> - Steve
>
> > Or I wonder if it's better to just introduce a new parameter to
> > vringh_init_iotlb()...
>
> I considered that, but IMO the parameter list there is already large, and it would
> be strange to add a parameter for the initial value of avail, but not for used, and
> no one needs the latter.

Fine.

Thanks

>
> - Steve
>
> >> +       }
> >>         return err;
> >>  }
> >>
> >> diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
> >> index 7b8fd977f71c..799762c83007 100644
> >> --- a/drivers/vhost/vringh.c
> >> +++ b/drivers/vhost/vringh.c
> >> @@ -595,6 +595,24 @@ static inline void __vringh_notify_disable(struct vringh *vrh,
> >>         }
> >>  }
> >>
> >> +static inline int __vringh_set_base(struct vringh *vrh, u16 idx,
> >> +                           int (*putu16)(const struct vringh *vrh,
> >> +                               __virtio16 *p, u16 val))
> >> +{
> >> +    int ret;
> >> +
> >> +    ret = putu16(vrh, &vrh->vring.avail->idx, idx);
> >> +    if (ret)
> >> +        return ret;
> >> +
> >> +    ret = putu16(vrh, &vrh->vring.used->idx, idx);
> >> +    if (ret)
> >> +        return ret;
> >> +
> >> +    vrh->last_avail_idx = vrh->last_used_idx = idx;
> >> +    return 0;
> >> +}
> >> +
> >>  /* Userspace access helpers: in this case, addresses are really userspace. */
> >>  static inline int getu16_user(const struct vringh *vrh, u16 *val, const __virtio16 *p)
> >>  {
> >> @@ -1456,6 +1474,18 @@ void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
> >>  }
> >>  EXPORT_SYMBOL(vringh_set_iotlb);
> >>
> >> +/**
> >> + * vringh_set_base_iotlb - set avail_idx and used_idx
> >> + * @vrh: the vring
> >> + * @idx: the value to set
> >> + */
> >> +int vringh_set_base_iotlb(struct vringh *vrh, u16 idx)
> >> +{
> >> +    return __vringh_set_base(vrh, idx, putu16_iotlb);
> >> +}
> >> +EXPORT_SYMBOL(vringh_set_base_iotlb);
> >> +
> >> +
> >>  /**
> >>   * vringh_getdesc_iotlb - get next available descriptor from ring with
> >>   * IOTLB.
> >> diff --git a/include/linux/vringh.h b/include/linux/vringh.h
> >> index c3a8117dabe8..e9b8af4e6a5e 100644
> >> --- a/include/linux/vringh.h
> >> +++ b/include/linux/vringh.h
> >> @@ -306,6 +306,8 @@ int vringh_init_iotlb_va(struct vringh *vrh, u64 features,
> >>                          struct vring_avail *avail,
> >>                          struct vring_used *used);
> >>
> >> +int vringh_set_base_iotlb(struct vringh *vrh, u16 idx);
> >> +
> >>  int vringh_getdesc_iotlb(struct vringh *vrh,
> >>                          struct vringh_kiov *riov,
> >>                          struct vringh_kiov *wiov,
> >> --
> >> 2.39.3
> >>
> >
>
  

Patch

diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 946488b8989f..f64758143115 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -2795,13 +2795,18 @@  static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
 	struct mlx5_control_vq *cvq = &mvdev->cvq;
 	int err = 0;
 
-	if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
+	if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
+		u16 last_avail_idx = cvq->vring.last_avail_idx;
+
 		err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
 					MLX5_CVQ_MAX_ENT, false,
 					(struct vring_desc *)(uintptr_t)cvq->desc_addr,
 					(struct vring_avail *)(uintptr_t)cvq->driver_addr,
 					(struct vring_used *)(uintptr_t)cvq->device_addr);
 
+		if (!err)
+			vringh_set_base_iotlb(&cvq->vring, last_avail_idx);
+	}
 	return err;
 }
 
diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index 7b8fd977f71c..799762c83007 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -595,6 +595,24 @@  static inline void __vringh_notify_disable(struct vringh *vrh,
 	}
 }
 
+static inline int __vringh_set_base(struct vringh *vrh, u16 idx,
+	                    int (*putu16)(const struct vringh *vrh,
+	                        __virtio16 *p, u16 val))
+{
+    int ret;
+
+    ret = putu16(vrh, &vrh->vring.avail->idx, idx);
+    if (ret)
+        return ret;
+
+    ret = putu16(vrh, &vrh->vring.used->idx, idx);
+    if (ret)
+        return ret;
+
+    vrh->last_avail_idx = vrh->last_used_idx = idx;
+    return 0;
+}
+
 /* Userspace access helpers: in this case, addresses are really userspace. */
 static inline int getu16_user(const struct vringh *vrh, u16 *val, const __virtio16 *p)
 {
@@ -1456,6 +1474,18 @@  void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
 }
 EXPORT_SYMBOL(vringh_set_iotlb);
 
+/**
+ * vringh_set_base_iotlb - set avail_idx and used_idx
+ * @vrh: the vring
+ * @idx: the value to set
+ */
+int vringh_set_base_iotlb(struct vringh *vrh, u16 idx)
+{
+    return __vringh_set_base(vrh, idx, putu16_iotlb);
+}
+EXPORT_SYMBOL(vringh_set_base_iotlb);
+
+
 /**
  * vringh_getdesc_iotlb - get next available descriptor from ring with
  * IOTLB.
diff --git a/include/linux/vringh.h b/include/linux/vringh.h
index c3a8117dabe8..e9b8af4e6a5e 100644
--- a/include/linux/vringh.h
+++ b/include/linux/vringh.h
@@ -306,6 +306,8 @@  int vringh_init_iotlb_va(struct vringh *vrh, u64 features,
 			 struct vring_avail *avail,
 			 struct vring_used *used);
 
+int vringh_set_base_iotlb(struct vringh *vrh, u16 idx);
+
 int vringh_getdesc_iotlb(struct vringh *vrh,
 			 struct vringh_kiov *riov,
 			 struct vringh_kiov *wiov,