[RFC,V1,05/13] vhost-vdpa: VHOST_IOTLB_REMAP

Message ID 1704919215-91319-6-git-send-email-steven.sistare@oracle.com
State New
Headers
Series vdpa live update |

Commit Message

Steven Sistare Jan. 10, 2024, 8:40 p.m. UTC
  When device ownership is passed to a new process via VHOST_NEW_OWNER,
some devices need to know the new userland addresses of the dma mappings.
Define the new iotlb message type VHOST_IOTLB_REMAP to update the uaddr
of a mapping.  The new uaddr must address the same memory object as
originally mapped.

The user must suspend the device before the old address is invalidated,
and cannot resume it until after VHOST_IOTLB_REMAP is called, but this
requirement is not enforced by the API.

Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
 drivers/vhost/vdpa.c             | 34 ++++++++++++++++++++++++++++++++
 include/uapi/linux/vhost_types.h | 11 ++++++++++-
 2 files changed, 44 insertions(+), 1 deletion(-)
  

Comments

Jason Wang Jan. 11, 2024, 3:08 a.m. UTC | #1
On Thu, Jan 11, 2024 at 4:40 AM Steve Sistare <steven.sistare@oracle.com> wrote:
>
> When device ownership is passed to a new process via VHOST_NEW_OWNER,
> some devices need to know the new userland addresses of the dma mappings.
> Define the new iotlb message type VHOST_IOTLB_REMAP to update the uaddr
> of a mapping.  The new uaddr must address the same memory object as
> originally mapped.
>
> The user must suspend the device before the old address is invalidated,
> and cannot resume it until after VHOST_IOTLB_REMAP is called, but this
> requirement is not enforced by the API.
>
> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> ---
>  drivers/vhost/vdpa.c             | 34 ++++++++++++++++++++++++++++++++
>  include/uapi/linux/vhost_types.h | 11 ++++++++++-
>  2 files changed, 44 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> index faed6471934a..ec5ca20bd47d 100644
> --- a/drivers/vhost/vdpa.c
> +++ b/drivers/vhost/vdpa.c
> @@ -1219,6 +1219,37 @@ static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
>
>  }
>
> +static int vhost_vdpa_process_iotlb_remap(struct vhost_vdpa *v,
> +                                         struct vhost_iotlb *iotlb,
> +                                         struct vhost_iotlb_msg *msg)
> +{
> +       struct vdpa_device *vdpa = v->vdpa;
> +       const struct vdpa_config_ops *ops = vdpa->config;
> +       u32 asid = iotlb_to_asid(iotlb);
> +       u64 start = msg->iova;
> +       u64 last = start + msg->size - 1;
> +       struct vhost_iotlb_map *map;
> +       int r = 0;
> +
> +       if (msg->perm || !msg->size)
> +               return -EINVAL;
> +
> +       map = vhost_iotlb_itree_first(iotlb, start, last);
> +       if (!map)
> +               return -ENOENT;
> +
> +       if (map->start != start || map->last != last)
> +               return -EINVAL;
> +
> +       /* batch will finish with remap.  non-batch must do it now. */
> +       if (!v->in_batch)
> +               r = ops->set_map(vdpa, asid, iotlb);
> +       if (!r)
> +               map->addr = msg->uaddr;

I may miss something, for example for PA mapping,

1) need to convert uaddr into phys addr
2) need to check whether the uaddr is backed by the same page or not?

Thanks

> +
> +       return r;
> +}
> +
>  static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
>                                            struct vhost_iotlb *iotlb,
>                                            struct vhost_iotlb_msg *msg)
> @@ -1298,6 +1329,9 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid,
>                         ops->set_map(vdpa, asid, iotlb);
>                 v->in_batch = false;
>                 break;
> +       case VHOST_IOTLB_REMAP:
> +               r = vhost_vdpa_process_iotlb_remap(v, iotlb, msg);
> +               break;
>         default:
>                 r = -EINVAL;
>                 break;
> diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h
> index 9177843951e9..35908315ff55 100644
> --- a/include/uapi/linux/vhost_types.h
> +++ b/include/uapi/linux/vhost_types.h
> @@ -79,7 +79,7 @@ struct vhost_iotlb_msg {
>  /*
>   * VHOST_IOTLB_BATCH_BEGIN and VHOST_IOTLB_BATCH_END allow modifying
>   * multiple mappings in one go: beginning with
> - * VHOST_IOTLB_BATCH_BEGIN, followed by any number of
> + * VHOST_IOTLB_BATCH_BEGIN, followed by any number of VHOST_IOTLB_REMAP or
>   * VHOST_IOTLB_UPDATE messages, and ending with VHOST_IOTLB_BATCH_END.
>   * When one of these two values is used as the message type, the rest
>   * of the fields in the message are ignored. There's no guarantee that
> @@ -87,6 +87,15 @@ struct vhost_iotlb_msg {
>   */
>  #define VHOST_IOTLB_BATCH_BEGIN    5
>  #define VHOST_IOTLB_BATCH_END      6
> +
> +/*
> + * VHOST_IOTLB_REMAP registers a new uaddr for the existing mapping at iova.
> + * The new uaddr must address the same memory object as originally mapped.
> + * Failure to do so will result in user memory corruption and/or device
> + * misbehavior.  iova and size must match the arguments used to create the
> + * an existing mapping.  Protection is not changed, and perm must be 0.
> + */
> +#define VHOST_IOTLB_REMAP          7
>         __u8 type;
>  };
>
> --
> 2.39.3
>
  
Eugenio Perez Martin Jan. 16, 2024, 6:14 p.m. UTC | #2
On Wed, Jan 10, 2024 at 9:40 PM Steve Sistare <steven.sistare@oracle.com> wrote:
>
> When device ownership is passed to a new process via VHOST_NEW_OWNER,
> some devices need to know the new userland addresses of the dma mappings.
> Define the new iotlb message type VHOST_IOTLB_REMAP to update the uaddr
> of a mapping.  The new uaddr must address the same memory object as
> originally mapped.
>

I think this new ioctl is very interesting, and can be used to
optimize some parts of live migration with shadow virtqueue if it
allows to actually replace the uaddr. Would you be interested in that
capability?

> The user must suspend the device before the old address is invalidated,
> and cannot resume it until after VHOST_IOTLB_REMAP is called, but this
> requirement is not enforced by the API.
>
> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> ---
>  drivers/vhost/vdpa.c             | 34 ++++++++++++++++++++++++++++++++
>  include/uapi/linux/vhost_types.h | 11 ++++++++++-
>  2 files changed, 44 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> index faed6471934a..ec5ca20bd47d 100644
> --- a/drivers/vhost/vdpa.c
> +++ b/drivers/vhost/vdpa.c
> @@ -1219,6 +1219,37 @@ static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
>
>  }
>
> +static int vhost_vdpa_process_iotlb_remap(struct vhost_vdpa *v,
> +                                         struct vhost_iotlb *iotlb,
> +                                         struct vhost_iotlb_msg *msg)
> +{
> +       struct vdpa_device *vdpa = v->vdpa;
> +       const struct vdpa_config_ops *ops = vdpa->config;
> +       u32 asid = iotlb_to_asid(iotlb);
> +       u64 start = msg->iova;
> +       u64 last = start + msg->size - 1;
> +       struct vhost_iotlb_map *map;
> +       int r = 0;
> +
> +       if (msg->perm || !msg->size)
> +               return -EINVAL;
> +
> +       map = vhost_iotlb_itree_first(iotlb, start, last);
> +       if (!map)
> +               return -ENOENT;
> +
> +       if (map->start != start || map->last != last)
> +               return -EINVAL;
> +
> +       /* batch will finish with remap.  non-batch must do it now. */
> +       if (!v->in_batch)
> +               r = ops->set_map(vdpa, asid, iotlb);

I'm missing how these cases are handled:
* The device does not expose set_map but dma_map / dma_unmap (you can
check this case with the simulator)
* The device uses platform iommu (!set_map && !dma_unmap vdpa_ops).

> +       if (!r)
> +               map->addr = msg->uaddr;
> +
> +       return r;
> +}
> +
>  static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
>                                            struct vhost_iotlb *iotlb,
>                                            struct vhost_iotlb_msg *msg)
> @@ -1298,6 +1329,9 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid,
>                         ops->set_map(vdpa, asid, iotlb);
>                 v->in_batch = false;
>                 break;
> +       case VHOST_IOTLB_REMAP:
> +               r = vhost_vdpa_process_iotlb_remap(v, iotlb, msg);
> +               break;
>         default:
>                 r = -EINVAL;
>                 break;
> diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h
> index 9177843951e9..35908315ff55 100644
> --- a/include/uapi/linux/vhost_types.h
> +++ b/include/uapi/linux/vhost_types.h
> @@ -79,7 +79,7 @@ struct vhost_iotlb_msg {
>  /*
>   * VHOST_IOTLB_BATCH_BEGIN and VHOST_IOTLB_BATCH_END allow modifying
>   * multiple mappings in one go: beginning with
> - * VHOST_IOTLB_BATCH_BEGIN, followed by any number of
> + * VHOST_IOTLB_BATCH_BEGIN, followed by any number of VHOST_IOTLB_REMAP or
>   * VHOST_IOTLB_UPDATE messages, and ending with VHOST_IOTLB_BATCH_END.
>   * When one of these two values is used as the message type, the rest
>   * of the fields in the message are ignored. There's no guarantee that
> @@ -87,6 +87,15 @@ struct vhost_iotlb_msg {
>   */
>  #define VHOST_IOTLB_BATCH_BEGIN    5
>  #define VHOST_IOTLB_BATCH_END      6
> +
> +/*
> + * VHOST_IOTLB_REMAP registers a new uaddr for the existing mapping at iova.
> + * The new uaddr must address the same memory object as originally mapped.
> + * Failure to do so will result in user memory corruption and/or device
> + * misbehavior.  iova and size must match the arguments used to create the
> + * an existing mapping.  Protection is not changed, and perm must be 0.
> + */
> +#define VHOST_IOTLB_REMAP          7
>         __u8 type;
>  };
>
> --
> 2.39.3
>
  
Steven Sistare Jan. 17, 2024, 8:31 p.m. UTC | #3
On 1/10/2024 10:08 PM, Jason Wang wrote:
> On Thu, Jan 11, 2024 at 4:40 AM Steve Sistare <steven.sistare@oracle.com> wrote:
>>
>> When device ownership is passed to a new process via VHOST_NEW_OWNER,
>> some devices need to know the new userland addresses of the dma mappings.
>> Define the new iotlb message type VHOST_IOTLB_REMAP to update the uaddr
>> of a mapping.  The new uaddr must address the same memory object as
>> originally mapped.
>>
>> The user must suspend the device before the old address is invalidated,
>> and cannot resume it until after VHOST_IOTLB_REMAP is called, but this
>> requirement is not enforced by the API.
>>
>> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
>> ---
>>  drivers/vhost/vdpa.c             | 34 ++++++++++++++++++++++++++++++++
>>  include/uapi/linux/vhost_types.h | 11 ++++++++++-
>>  2 files changed, 44 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
>> index faed6471934a..ec5ca20bd47d 100644
>> --- a/drivers/vhost/vdpa.c
>> +++ b/drivers/vhost/vdpa.c
>> @@ -1219,6 +1219,37 @@ static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
>>
>>  }
>>
>> +static int vhost_vdpa_process_iotlb_remap(struct vhost_vdpa *v,
>> +                                         struct vhost_iotlb *iotlb,
>> +                                         struct vhost_iotlb_msg *msg)
>> +{
>> +       struct vdpa_device *vdpa = v->vdpa;
>> +       const struct vdpa_config_ops *ops = vdpa->config;
>> +       u32 asid = iotlb_to_asid(iotlb);
>> +       u64 start = msg->iova;
>> +       u64 last = start + msg->size - 1;
>> +       struct vhost_iotlb_map *map;
>> +       int r = 0;
>> +
>> +       if (msg->perm || !msg->size)
>> +               return -EINVAL;
>> +
>> +       map = vhost_iotlb_itree_first(iotlb, start, last);
>> +       if (!map)
>> +               return -ENOENT;
>> +
>> +       if (map->start != start || map->last != last)
>> +               return -EINVAL;
>> +
>> +       /* batch will finish with remap.  non-batch must do it now. */
>> +       if (!v->in_batch)
>> +               r = ops->set_map(vdpa, asid, iotlb);
>> +       if (!r)
>> +               map->addr = msg->uaddr;
> 
> I may miss something, for example for PA mapping,
> 
> 1) need to convert uaddr into phys addr
> 2) need to check whether the uaddr is backed by the same page or not?

This code does not verify that the new size@uaddr points to the same physical
pages as the old size@uaddr.  If the app screws up and they differ, then the app
may corrupt its own memory, but no-one else's.

It would be expensive for large memories to verify page by page, O(npages), and such
verification lies on the critical path for virtual machine downtime during live update.
I could compare the properties of the vma(s) for the old size@uaddr vs the vma for the 
new, but that is more complicated and would be a maintenance headache.  When I submitted
such code to Alex W when writing the equivalent patches for vfio, he said don't check,
correctness is the user's responsibility.

- Steve

>> +
>> +       return r;
>> +}
>> +
>>  static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
>>                                            struct vhost_iotlb *iotlb,
>>                                            struct vhost_iotlb_msg *msg)
>> @@ -1298,6 +1329,9 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid,
>>                         ops->set_map(vdpa, asid, iotlb);
>>                 v->in_batch = false;
>>                 break;
>> +       case VHOST_IOTLB_REMAP:
>> +               r = vhost_vdpa_process_iotlb_remap(v, iotlb, msg);
>> +               break;
>>         default:
>>                 r = -EINVAL;
>>                 break;
>> diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h
>> index 9177843951e9..35908315ff55 100644
>> --- a/include/uapi/linux/vhost_types.h
>> +++ b/include/uapi/linux/vhost_types.h
>> @@ -79,7 +79,7 @@ struct vhost_iotlb_msg {
>>  /*
>>   * VHOST_IOTLB_BATCH_BEGIN and VHOST_IOTLB_BATCH_END allow modifying
>>   * multiple mappings in one go: beginning with
>> - * VHOST_IOTLB_BATCH_BEGIN, followed by any number of
>> + * VHOST_IOTLB_BATCH_BEGIN, followed by any number of VHOST_IOTLB_REMAP or
>>   * VHOST_IOTLB_UPDATE messages, and ending with VHOST_IOTLB_BATCH_END.
>>   * When one of these two values is used as the message type, the rest
>>   * of the fields in the message are ignored. There's no guarantee that
>> @@ -87,6 +87,15 @@ struct vhost_iotlb_msg {
>>   */
>>  #define VHOST_IOTLB_BATCH_BEGIN    5
>>  #define VHOST_IOTLB_BATCH_END      6
>> +
>> +/*
>> + * VHOST_IOTLB_REMAP registers a new uaddr for the existing mapping at iova.
>> + * The new uaddr must address the same memory object as originally mapped.
>> + * Failure to do so will result in user memory corruption and/or device
>> + * misbehavior.  iova and size must match the arguments used to create the
>> + * an existing mapping.  Protection is not changed, and perm must be 0.
>> + */
>> +#define VHOST_IOTLB_REMAP          7
>>         __u8 type;
>>  };
>>
>> --
>> 2.39.3
>>
>
  
Jason Wang Jan. 22, 2024, 4:05 a.m. UTC | #4
On Thu, Jan 18, 2024 at 4:32 AM Steven Sistare
<steven.sistare@oracle.com> wrote:
>
> On 1/10/2024 10:08 PM, Jason Wang wrote:
> > On Thu, Jan 11, 2024 at 4:40 AM Steve Sistare <steven.sistare@oracle.com> wrote:
> >>
> >> When device ownership is passed to a new process via VHOST_NEW_OWNER,
> >> some devices need to know the new userland addresses of the dma mappings.
> >> Define the new iotlb message type VHOST_IOTLB_REMAP to update the uaddr
> >> of a mapping.  The new uaddr must address the same memory object as
> >> originally mapped.
> >>
> >> The user must suspend the device before the old address is invalidated,
> >> and cannot resume it until after VHOST_IOTLB_REMAP is called, but this
> >> requirement is not enforced by the API.
> >>
> >> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> >> ---
> >>  drivers/vhost/vdpa.c             | 34 ++++++++++++++++++++++++++++++++
> >>  include/uapi/linux/vhost_types.h | 11 ++++++++++-
> >>  2 files changed, 44 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> >> index faed6471934a..ec5ca20bd47d 100644
> >> --- a/drivers/vhost/vdpa.c
> >> +++ b/drivers/vhost/vdpa.c
> >> @@ -1219,6 +1219,37 @@ static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
> >>
> >>  }
> >>
> >> +static int vhost_vdpa_process_iotlb_remap(struct vhost_vdpa *v,
> >> +                                         struct vhost_iotlb *iotlb,
> >> +                                         struct vhost_iotlb_msg *msg)
> >> +{
> >> +       struct vdpa_device *vdpa = v->vdpa;
> >> +       const struct vdpa_config_ops *ops = vdpa->config;
> >> +       u32 asid = iotlb_to_asid(iotlb);
> >> +       u64 start = msg->iova;
> >> +       u64 last = start + msg->size - 1;
> >> +       struct vhost_iotlb_map *map;
> >> +       int r = 0;
> >> +
> >> +       if (msg->perm || !msg->size)
> >> +               return -EINVAL;
> >> +
> >> +       map = vhost_iotlb_itree_first(iotlb, start, last);
> >> +       if (!map)
> >> +               return -ENOENT;
> >> +
> >> +       if (map->start != start || map->last != last)
> >> +               return -EINVAL;
> >> +
> >> +       /* batch will finish with remap.  non-batch must do it now. */
> >> +       if (!v->in_batch)
> >> +               r = ops->set_map(vdpa, asid, iotlb);
> >> +       if (!r)
> >> +               map->addr = msg->uaddr;
> >
> > I may miss something, for example for PA mapping,
> >
> > 1) need to convert uaddr into phys addr
> > 2) need to check whether the uaddr is backed by the same page or not?
>
> This code does not verify that the new size@uaddr points to the same physical
> pages as the old size@uaddr.  If the app screws up and they differ, then the app
> may corrupt its own memory, but no-one else's.
>
> It would be expensive for large memories to verify page by page, O(npages), and such
> verification lies on the critical path for virtual machine downtime during live update.
> I could compare the properties of the vma(s) for the old size@uaddr vs the vma for the
> new, but that is more complicated and would be a maintenance headache.  When I submitted
> such code to Alex W when writing the equivalent patches for vfio, he said don't check,
> correctness is the user's responsibility.

Ok, let's document this somewhere.

Thanks

>
> - Steve
>
> >> +
> >> +       return r;
> >> +}
> >> +
> >>  static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
> >>                                            struct vhost_iotlb *iotlb,
> >>                                            struct vhost_iotlb_msg *msg)
> >> @@ -1298,6 +1329,9 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid,
> >>                         ops->set_map(vdpa, asid, iotlb);
> >>                 v->in_batch = false;
> >>                 break;
> >> +       case VHOST_IOTLB_REMAP:
> >> +               r = vhost_vdpa_process_iotlb_remap(v, iotlb, msg);
> >> +               break;
> >>         default:
> >>                 r = -EINVAL;
> >>                 break;
> >> diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h
> >> index 9177843951e9..35908315ff55 100644
> >> --- a/include/uapi/linux/vhost_types.h
> >> +++ b/include/uapi/linux/vhost_types.h
> >> @@ -79,7 +79,7 @@ struct vhost_iotlb_msg {
> >>  /*
> >>   * VHOST_IOTLB_BATCH_BEGIN and VHOST_IOTLB_BATCH_END allow modifying
> >>   * multiple mappings in one go: beginning with
> >> - * VHOST_IOTLB_BATCH_BEGIN, followed by any number of
> >> + * VHOST_IOTLB_BATCH_BEGIN, followed by any number of VHOST_IOTLB_REMAP or
> >>   * VHOST_IOTLB_UPDATE messages, and ending with VHOST_IOTLB_BATCH_END.
> >>   * When one of these two values is used as the message type, the rest
> >>   * of the fields in the message are ignored. There's no guarantee that
> >> @@ -87,6 +87,15 @@ struct vhost_iotlb_msg {
> >>   */
> >>  #define VHOST_IOTLB_BATCH_BEGIN    5
> >>  #define VHOST_IOTLB_BATCH_END      6
> >> +
> >> +/*
> >> + * VHOST_IOTLB_REMAP registers a new uaddr for the existing mapping at iova.
> >> + * The new uaddr must address the same memory object as originally mapped.
> >> + * Failure to do so will result in user memory corruption and/or device
> >> + * misbehavior.  iova and size must match the arguments used to create the
> >> + * an existing mapping.  Protection is not changed, and perm must be 0.
> >> + */
> >> +#define VHOST_IOTLB_REMAP          7
> >>         __u8 type;
> >>  };
> >>
> >> --
> >> 2.39.3
> >>
> >
>
  
Steven Sistare Feb. 9, 2024, 3:49 p.m. UTC | #5
On 1/16/2024 1:14 PM, Eugenio Perez Martin wrote:
> On Wed, Jan 10, 2024 at 9:40 PM Steve Sistare <steven.sistare@oracle.com> wrote:
>>
>> When device ownership is passed to a new process via VHOST_NEW_OWNER,
>> some devices need to know the new userland addresses of the dma mappings.
>> Define the new iotlb message type VHOST_IOTLB_REMAP to update the uaddr
>> of a mapping.  The new uaddr must address the same memory object as
>> originally mapped.
> 
> I think this new ioctl is very interesting, and can be used to
> optimize some parts of live migration with shadow virtqueue if it
> allows to actually replace the uaddr. Would you be interested in that
> capability?

Sure.  Please share your thoughts on how it would be used.  I don't follow,
because with live migration, you are creating a new vdpa instance with new
shadow queues on the destination, versus relocating old shadow queues (which 
we could do during live update which preserves the same vdpa instance).

(Sorry for the late response, I stashed this email and forgot to respond.)

>> The user must suspend the device before the old address is invalidated,
>> and cannot resume it until after VHOST_IOTLB_REMAP is called, but this
>> requirement is not enforced by the API.
>>
>> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
>> ---
>>  drivers/vhost/vdpa.c             | 34 ++++++++++++++++++++++++++++++++
>>  include/uapi/linux/vhost_types.h | 11 ++++++++++-
>>  2 files changed, 44 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
>> index faed6471934a..ec5ca20bd47d 100644
>> --- a/drivers/vhost/vdpa.c
>> +++ b/drivers/vhost/vdpa.c
>> @@ -1219,6 +1219,37 @@ static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
>>
>>  }
>>
>> +static int vhost_vdpa_process_iotlb_remap(struct vhost_vdpa *v,
>> +                                         struct vhost_iotlb *iotlb,
>> +                                         struct vhost_iotlb_msg *msg)
>> +{
>> +       struct vdpa_device *vdpa = v->vdpa;
>> +       const struct vdpa_config_ops *ops = vdpa->config;
>> +       u32 asid = iotlb_to_asid(iotlb);
>> +       u64 start = msg->iova;
>> +       u64 last = start + msg->size - 1;
>> +       struct vhost_iotlb_map *map;
>> +       int r = 0;
>> +
>> +       if (msg->perm || !msg->size)
>> +               return -EINVAL;
>> +
>> +       map = vhost_iotlb_itree_first(iotlb, start, last);
>> +       if (!map)
>> +               return -ENOENT;
>> +
>> +       if (map->start != start || map->last != last)
>> +               return -EINVAL;
>> +
>> +       /* batch will finish with remap.  non-batch must do it now. */
>> +       if (!v->in_batch)
>> +               r = ops->set_map(vdpa, asid, iotlb);
> 
> I'm missing how these cases are handled:
>
> * The device does not expose set_map but dma_map / dma_unmap (you can
> check this case with the simulator)

I chose not to support dma_map, because set_map looks to be more commonly supported,
and batch-mode plus set_map is the most efficient way to support remapping.
I could define a dma_remap entry point if folks think that is important.
Regardless, I will check that ops->set_map != NULL before calling it.

> * The device uses platform iommu (!set_map && !dma_unmap vdpa_ops).

I believe this just works, because iommu_map() never saves userland address, so it 
does not need to be informed when uaddr changes.  That is certainly true for the 
code path:

  vhost_vdpa_pa_map()
    vhost_vdpa_map()
      if !dma_map && !set_map
        iommu_map()

However, this code path confuses me:

  vhost_vdpa_process_iotlb_update()
    if (vdpa->use_va)
      vhost_vdpa_va_map(... uaddr)
        vhost_vdpa_map(... uaddr)
          iommu_map(... uaddr)

AFAICT uaddr is never translated to physical.
Maybe use_va is always false if !dma_map && !set_map  ?

- Steve

>> +       if (!r)
>> +               map->addr = msg->uaddr;
>> +
>> +       return r;
>> +}
>> +
>>  static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
>>                                            struct vhost_iotlb *iotlb,
>>                                            struct vhost_iotlb_msg *msg)
>> @@ -1298,6 +1329,9 @@ static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid,
>>                         ops->set_map(vdpa, asid, iotlb);
>>                 v->in_batch = false;
>>                 break;
>> +       case VHOST_IOTLB_REMAP:
>> +               r = vhost_vdpa_process_iotlb_remap(v, iotlb, msg);
>> +               break;
>>         default:
>>                 r = -EINVAL;
>>                 break;
>> diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h
>> index 9177843951e9..35908315ff55 100644
>> --- a/include/uapi/linux/vhost_types.h
>> +++ b/include/uapi/linux/vhost_types.h
>> @@ -79,7 +79,7 @@ struct vhost_iotlb_msg {
>>  /*
>>   * VHOST_IOTLB_BATCH_BEGIN and VHOST_IOTLB_BATCH_END allow modifying
>>   * multiple mappings in one go: beginning with
>> - * VHOST_IOTLB_BATCH_BEGIN, followed by any number of
>> + * VHOST_IOTLB_BATCH_BEGIN, followed by any number of VHOST_IOTLB_REMAP or
>>   * VHOST_IOTLB_UPDATE messages, and ending with VHOST_IOTLB_BATCH_END.
>>   * When one of these two values is used as the message type, the rest
>>   * of the fields in the message are ignored. There's no guarantee that
>> @@ -87,6 +87,15 @@ struct vhost_iotlb_msg {
>>   */
>>  #define VHOST_IOTLB_BATCH_BEGIN    5
>>  #define VHOST_IOTLB_BATCH_END      6
>> +
>> +/*
>> + * VHOST_IOTLB_REMAP registers a new uaddr for the existing mapping at iova.
>> + * The new uaddr must address the same memory object as originally mapped.
>> + * Failure to do so will result in user memory corruption and/or device
>> + * misbehavior.  iova and size must match the arguments used to create the
>> + * an existing mapping.  Protection is not changed, and perm must be 0.
>> + */
>> +#define VHOST_IOTLB_REMAP          7
>>         __u8 type;
>>  };
>>
>> --
>> 2.39.3
>>
>
  

Patch

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index faed6471934a..ec5ca20bd47d 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -1219,6 +1219,37 @@  static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
 
 }
 
+static int vhost_vdpa_process_iotlb_remap(struct vhost_vdpa *v,
+					  struct vhost_iotlb *iotlb,
+					  struct vhost_iotlb_msg *msg)
+{
+	struct vdpa_device *vdpa = v->vdpa;
+	const struct vdpa_config_ops *ops = vdpa->config;
+	u32 asid = iotlb_to_asid(iotlb);
+	u64 start = msg->iova;
+	u64 last = start + msg->size - 1;
+	struct vhost_iotlb_map *map;
+	int r = 0;
+
+	if (msg->perm || !msg->size)
+		return -EINVAL;
+
+	map = vhost_iotlb_itree_first(iotlb, start, last);
+	if (!map)
+		return -ENOENT;
+
+	if (map->start != start || map->last != last)
+		return -EINVAL;
+
+	/* batch will finish with remap.  non-batch must do it now. */
+	if (!v->in_batch)
+		r = ops->set_map(vdpa, asid, iotlb);
+	if (!r)
+		map->addr = msg->uaddr;
+
+	return r;
+}
+
 static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
 					   struct vhost_iotlb *iotlb,
 					   struct vhost_iotlb_msg *msg)
@@ -1298,6 +1329,9 @@  static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, u32 asid,
 			ops->set_map(vdpa, asid, iotlb);
 		v->in_batch = false;
 		break;
+	case VHOST_IOTLB_REMAP:
+		r = vhost_vdpa_process_iotlb_remap(v, iotlb, msg);
+		break;
 	default:
 		r = -EINVAL;
 		break;
diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h
index 9177843951e9..35908315ff55 100644
--- a/include/uapi/linux/vhost_types.h
+++ b/include/uapi/linux/vhost_types.h
@@ -79,7 +79,7 @@  struct vhost_iotlb_msg {
 /*
  * VHOST_IOTLB_BATCH_BEGIN and VHOST_IOTLB_BATCH_END allow modifying
  * multiple mappings in one go: beginning with
- * VHOST_IOTLB_BATCH_BEGIN, followed by any number of
+ * VHOST_IOTLB_BATCH_BEGIN, followed by any number of VHOST_IOTLB_REMAP or
  * VHOST_IOTLB_UPDATE messages, and ending with VHOST_IOTLB_BATCH_END.
  * When one of these two values is used as the message type, the rest
  * of the fields in the message are ignored. There's no guarantee that
@@ -87,6 +87,15 @@  struct vhost_iotlb_msg {
  */
 #define VHOST_IOTLB_BATCH_BEGIN    5
 #define VHOST_IOTLB_BATCH_END      6
+
+/*
+ * VHOST_IOTLB_REMAP registers a new uaddr for the existing mapping at iova.
+ * The new uaddr must address the same memory object as originally mapped.
+ * Failure to do so will result in user memory corruption and/or device
+ * misbehavior.  iova and size must match the arguments used to create the
+ * an existing mapping.  Protection is not changed, and perm must be 0.
+ */
+#define VHOST_IOTLB_REMAP          7
 	__u8 type;
 };