[v2] vdpasim: support doorbell mapping

Message ID 20221205135130.2336-1-longpeng2@huawei.com
State New
Headers
Series [v2] vdpasim: support doorbell mapping |

Commit Message

Longpeng(Mike) Dec. 5, 2022, 1:51 p.m. UTC
  From: Longpeng <longpeng2@huawei.com>

Support doorbell mapping for vdpasim devices, then we can test the notify
passthrough feature even if there's no real hardware on hand.

Allocates a dummy page which is used to emulate the notify page of the device,
all VQs share the same notify register  that initiated to 0xffff. A  periodic
work will check whether there're requests need to process ( the value of the
notify register is 0xffff or not ).

This cap is disabled as default, users can enable it as follow:
  modprobe vdpa_sim notify_passthrough=true

Signed-off-by: Longpeng <longpeng2@huawei.com>
---
Changes v1->v2:
  - support both kick mode and passthrough mode. [Jason]
  - poll the notify register first. [Jason, Michael]
---
 drivers/vdpa/vdpa_sim/vdpa_sim.c | 77 ++++++++++++++++++++++++++++++++
 drivers/vdpa/vdpa_sim/vdpa_sim.h |  3 ++
 2 files changed, 80 insertions(+)
  

Comments

Jason Wang Dec. 6, 2022, 6:59 a.m. UTC | #1
On Mon, Dec 5, 2022 at 9:52 PM Longpeng(Mike) <longpeng2@huawei.com> wrote:
>
> From: Longpeng <longpeng2@huawei.com>
>
> Support doorbell mapping for vdpasim devices, then we can test the notify
> passthrough feature even if there's no real hardware on hand.
>
> Allocates a dummy page which is used to emulate the notify page of the device,
> all VQs share the same notify register  that initiated to 0xffff. A  periodic
> work will check whether there're requests need to process ( the value of the
> notify register is 0xffff or not ).
>
> This cap is disabled as default, users can enable it as follow:
>   modprobe vdpa_sim notify_passthrough=true
>
> Signed-off-by: Longpeng <longpeng2@huawei.com>
> ---
> Changes v1->v2:
>   - support both kick mode and passthrough mode. [Jason]
>   - poll the notify register first. [Jason, Michael]
> ---
>  drivers/vdpa/vdpa_sim/vdpa_sim.c | 77 ++++++++++++++++++++++++++++++++
>  drivers/vdpa/vdpa_sim/vdpa_sim.h |  3 ++
>  2 files changed, 80 insertions(+)
>
> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> index 7438a89ce939..07096a04dabb 100644
> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> @@ -14,6 +14,7 @@
>  #include <linux/slab.h>
>  #include <linux/sched.h>
>  #include <linux/dma-map-ops.h>
> +#include <asm/set_memory.h>
>  #include <linux/vringh.h>
>  #include <linux/vdpa.h>
>  #include <linux/vhost_iotlb.h>
> @@ -36,9 +37,16 @@ module_param(max_iotlb_entries, int, 0444);
>  MODULE_PARM_DESC(max_iotlb_entries,
>                  "Maximum number of iotlb entries for each address space. 0 means unlimited. (default: 2048)");
>
> +static bool notify_passthrough;
> +module_param(notify_passthrough, bool, 0444);
> +MODULE_PARM_DESC(notify_passthrough,
> +                "Enable vq notify(doorbell) area mapping. (default: false)");

I'm not sure if it's worth doing this, I think we can afford the cost
of periodic work (especially considering it's a simulator).

> +
>  #define VDPASIM_QUEUE_ALIGN PAGE_SIZE
>  #define VDPASIM_QUEUE_MAX 256
>  #define VDPASIM_VENDOR_ID 0
> +#define VDPASIM_VRING_POLL_PERIOD 100 /* ms */
> +#define VDPASIM_NOTIFY_DEFVAL 0xffff
>
>  static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa)
>  {
> @@ -246,6 +254,28 @@ static const struct dma_map_ops vdpasim_dma_ops = {
>  static const struct vdpa_config_ops vdpasim_config_ops;
>  static const struct vdpa_config_ops vdpasim_batch_config_ops;
>
> +static void vdpasim_notify_work(struct work_struct *work)
> +{
> +       struct vdpasim *vdpasim;
> +       u16 *val;
> +
> +       vdpasim = container_of(work, struct vdpasim, notify_work.work);
> +
> +       if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
> +               goto out;
> +
> +       if (!vdpasim->running)
> +               goto out;
> +
> +       val = (u16 *)vdpasim->notify;
> +       if (xchg(val, VDPASIM_NOTIFY_DEFVAL) != VDPASIM_NOTIFY_DEFVAL)
> +               schedule_work(&vdpasim->work);
> +
> +out:
> +       schedule_delayed_work(&vdpasim->notify_work,
> +                             msecs_to_jiffies(VDPASIM_VRING_POLL_PERIOD));
> +}
> +
>  struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
>                                const struct vdpa_dev_set_config *config)
>  {
> @@ -287,6 +317,18 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
>         set_dma_ops(dev, &vdpasim_dma_ops);
>         vdpasim->vdpa.mdev = dev_attr->mgmt_dev;
>
> +       if (notify_passthrough) {
> +               INIT_DELAYED_WORK(&vdpasim->notify_work, vdpasim_notify_work);
> +
> +               vdpasim->notify = __get_free_page(GFP_KERNEL | __GFP_ZERO);
> +               if (!vdpasim->notify)
> +                       goto err_iommu;
> +#ifdef CONFIG_X86
> +               set_memory_uc(vdpasim->notify, 1);
> +#endif

I had the same question with version 1, any reason for having this
part uncacheable? It's a hint that we have bugs somewhere. Are we
missing ACCESS/ORDER_PLATFORM or other features?

> +               *(u16 *)vdpasim->notify = VDPASIM_NOTIFY_DEFVAL;

WRITE_ONCE()?

> +       }
> +
>         vdpasim->config = kzalloc(dev_attr->config_size, GFP_KERNEL);
>         if (!vdpasim->config)
>                 goto err_iommu;
> @@ -495,6 +537,18 @@ static u8 vdpasim_get_status(struct vdpa_device *vdpa)
>         return status;
>  }
>
> +static void vdpasim_set_notify_work(struct vdpasim *vdpasim, bool start)
> +{
> +       if (!notify_passthrough)
> +               return;

Only two callers for this function: one is start another is stop. If
we decide to get rid of notify_passthrough, I'd rather opencode the
schedule/cancel_delayed().

Thanks

> +
> +       if (start)
> +               schedule_delayed_work(&vdpasim->notify_work,
> +                                     msecs_to_jiffies(VDPASIM_VRING_POLL_PERIOD));
> +       else
> +               cancel_delayed_work_sync(&vdpasim->notify_work);
> +}
> +
>  static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status)
>  {
>         struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
> @@ -502,12 +556,14 @@ static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status)
>         spin_lock(&vdpasim->lock);
>         vdpasim->status = status;
>         spin_unlock(&vdpasim->lock);
> +       vdpasim_set_notify_work(vdpasim, status & VIRTIO_CONFIG_S_DRIVER_OK);
>  }
>
>  static int vdpasim_reset(struct vdpa_device *vdpa, bool clear)
>  {
>         struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
>
> +       vdpasim_set_notify_work(vdpasim, false);
>         spin_lock(&vdpasim->lock);
>         vdpasim->status = 0;
>         vdpasim_do_reset(vdpasim);
> @@ -672,11 +728,24 @@ static int vdpasim_dma_unmap(struct vdpa_device *vdpa, unsigned int asid,
>         return 0;
>  }
>
> +static struct vdpa_notification_area
> +vdpasim_get_vq_notification(struct vdpa_device *vdpa, u16 qid)
> +{
> +       struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
> +       struct vdpa_notification_area notify;
> +
> +       notify.addr = virt_to_phys((void *)vdpasim->notify);
> +       notify.size = PAGE_SIZE;
> +
> +       return notify;
> +}
> +
>  static void vdpasim_free(struct vdpa_device *vdpa)
>  {
>         struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
>         int i;
>
> +       vdpasim_set_notify_work(vdpasim, false);
>         cancel_work_sync(&vdpasim->work);
>
>         for (i = 0; i < vdpasim->dev_attr.nvqs; i++) {
> @@ -693,6 +762,12 @@ static void vdpasim_free(struct vdpa_device *vdpa)
>         vhost_iotlb_free(vdpasim->iommu);
>         kfree(vdpasim->vqs);
>         kfree(vdpasim->config);
> +       if (vdpasim->notify) {
> +#ifdef CONFIG_X86
> +               set_memory_wb(vdpasim->notify, 1);
> +#endif
> +               free_page(vdpasim->notify);
> +       }
>  }
>
>  static const struct vdpa_config_ops vdpasim_config_ops = {
> @@ -704,6 +779,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
>         .get_vq_ready           = vdpasim_get_vq_ready,
>         .set_vq_state           = vdpasim_set_vq_state,
>         .get_vq_state           = vdpasim_get_vq_state,
> +       .get_vq_notification    = vdpasim_get_vq_notification,
>         .get_vq_align           = vdpasim_get_vq_align,
>         .get_vq_group           = vdpasim_get_vq_group,
>         .get_device_features    = vdpasim_get_device_features,
> @@ -737,6 +813,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = {
>         .get_vq_ready           = vdpasim_get_vq_ready,
>         .set_vq_state           = vdpasim_set_vq_state,
>         .get_vq_state           = vdpasim_get_vq_state,
> +       .get_vq_notification    = vdpasim_get_vq_notification,
>         .get_vq_align           = vdpasim_get_vq_align,
>         .get_vq_group           = vdpasim_get_vq_group,
>         .get_device_features    = vdpasim_get_device_features,
> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h
> index 0e78737dcc16..0769ccbd3911 100644
> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.h
> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h
> @@ -69,6 +69,9 @@ struct vdpasim {
>         bool running;
>         /* spinlock to synchronize iommu table */
>         spinlock_t iommu_lock;
> +       /* dummy notify page */
> +       unsigned long notify;
> +       struct delayed_work notify_work;
>  };
>
>  struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *attr,
> --
> 2.23.0
>
  
Longpeng(Mike) Dec. 6, 2022, 8:45 a.m. UTC | #2
在 2022/12/6 14:59, Jason Wang 写道:
> On Mon, Dec 5, 2022 at 9:52 PM Longpeng(Mike) <longpeng2@huawei.com> wrote:
>>
>> From: Longpeng <longpeng2@huawei.com>
>>
>> Support doorbell mapping for vdpasim devices, then we can test the notify
>> passthrough feature even if there's no real hardware on hand.
>>
>> Allocates a dummy page which is used to emulate the notify page of the device,
>> all VQs share the same notify register  that initiated to 0xffff. A  periodic
>> work will check whether there're requests need to process ( the value of the
>> notify register is 0xffff or not ).
>>
>> This cap is disabled as default, users can enable it as follow:
>>    modprobe vdpa_sim notify_passthrough=true
>>
>> Signed-off-by: Longpeng <longpeng2@huawei.com>
>> ---
>> Changes v1->v2:
>>    - support both kick mode and passthrough mode. [Jason]
>>    - poll the notify register first. [Jason, Michael]
>> ---
>>   drivers/vdpa/vdpa_sim/vdpa_sim.c | 77 ++++++++++++++++++++++++++++++++
>>   drivers/vdpa/vdpa_sim/vdpa_sim.h |  3 ++
>>   2 files changed, 80 insertions(+)
>>
>> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
>> index 7438a89ce939..07096a04dabb 100644
>> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
>> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
>> @@ -14,6 +14,7 @@
>>   #include <linux/slab.h>
>>   #include <linux/sched.h>
>>   #include <linux/dma-map-ops.h>
>> +#include <asm/set_memory.h>
>>   #include <linux/vringh.h>
>>   #include <linux/vdpa.h>
>>   #include <linux/vhost_iotlb.h>
>> @@ -36,9 +37,16 @@ module_param(max_iotlb_entries, int, 0444);
>>   MODULE_PARM_DESC(max_iotlb_entries,
>>                   "Maximum number of iotlb entries for each address space. 0 means unlimited. (default: 2048)");
>>
>> +static bool notify_passthrough;
>> +module_param(notify_passthrough, bool, 0444);
>> +MODULE_PARM_DESC(notify_passthrough,
>> +                "Enable vq notify(doorbell) area mapping. (default: false)");
> 
> I'm not sure if it's worth doing this, I think we can afford the cost
> of periodic work (especially considering it's a simulator).
> 
We can support kick mode and passthgourh mode simultaneously now, it is 
a little unnecessary. I would remove it in next version.

>> +
>>   #define VDPASIM_QUEUE_ALIGN PAGE_SIZE
>>   #define VDPASIM_QUEUE_MAX 256
>>   #define VDPASIM_VENDOR_ID 0
>> +#define VDPASIM_VRING_POLL_PERIOD 100 /* ms */
>> +#define VDPASIM_NOTIFY_DEFVAL 0xffff
>>
>>   static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa)
>>   {
>> @@ -246,6 +254,28 @@ static const struct dma_map_ops vdpasim_dma_ops = {
>>   static const struct vdpa_config_ops vdpasim_config_ops;
>>   static const struct vdpa_config_ops vdpasim_batch_config_ops;
>>
>> +static void vdpasim_notify_work(struct work_struct *work)
>> +{
>> +       struct vdpasim *vdpasim;
>> +       u16 *val;
>> +
>> +       vdpasim = container_of(work, struct vdpasim, notify_work.work);
>> +
>> +       if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
>> +               goto out;
>> +
>> +       if (!vdpasim->running)
>> +               goto out;
>> +
>> +       val = (u16 *)vdpasim->notify;
>> +       if (xchg(val, VDPASIM_NOTIFY_DEFVAL) != VDPASIM_NOTIFY_DEFVAL)
>> +               schedule_work(&vdpasim->work);
>> +
>> +out:
>> +       schedule_delayed_work(&vdpasim->notify_work,
>> +                             msecs_to_jiffies(VDPASIM_VRING_POLL_PERIOD));
>> +}
>> +
>>   struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
>>                                 const struct vdpa_dev_set_config *config)
>>   {
>> @@ -287,6 +317,18 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
>>          set_dma_ops(dev, &vdpasim_dma_ops);
>>          vdpasim->vdpa.mdev = dev_attr->mgmt_dev;
>>
>> +       if (notify_passthrough) {
>> +               INIT_DELAYED_WORK(&vdpasim->notify_work, vdpasim_notify_work);
>> +
>> +               vdpasim->notify = __get_free_page(GFP_KERNEL | __GFP_ZERO);
>> +               if (!vdpasim->notify)
>> +                       goto err_iommu;
>> +#ifdef CONFIG_X86
>> +               set_memory_uc(vdpasim->notify, 1);
>> +#endif
> 
> I had the same question with version 1, any reason for having this
> part uncacheable? It's a hint that we have bugs somewhere. Are we
> missing ACCESS/ORDER_PLATFORM or other features?
> 
Oh, I had replied you in that thread.

The vm_page_prot of notify mapping VMA is pgprot_noncached (see 
vhost_vdpa_fault) but the vdpasim->notify is WB, so we should set its 
memtype to UC here and set it back to WB when releasing the device.

Otherwise, the warning in trace_pfn_remap() would be triggered.
For example:
x86/PAT: CPU 16/KVM:17819 map pfn RAM range req uncached-minus for [mem
0x5151f3000-0x5151f3fff], got write-back

>> +               *(u16 *)vdpasim->notify = VDPASIM_NOTIFY_DEFVAL;
> 
> WRITE_ONCE()?
> 
Okay.

>> +       }
>> +
>>          vdpasim->config = kzalloc(dev_attr->config_size, GFP_KERNEL);
>>          if (!vdpasim->config)
>>                  goto err_iommu;
>> @@ -495,6 +537,18 @@ static u8 vdpasim_get_status(struct vdpa_device *vdpa)
>>          return status;
>>   }
>>
>> +static void vdpasim_set_notify_work(struct vdpasim *vdpasim, bool start)
>> +{
>> +       if (!notify_passthrough)
>> +               return;
> 
> Only two callers for this function: one is start another is stop. If
> we decide to get rid of notify_passthrough, I'd rather opencode the
> schedule/cancel_delayed().
> 
Okay, thanks.

> Thanks
> 
>> +
>> +       if (start)
>> +               schedule_delayed_work(&vdpasim->notify_work,
>> +                                     msecs_to_jiffies(VDPASIM_VRING_POLL_PERIOD));
>> +       else
>> +               cancel_delayed_work_sync(&vdpasim->notify_work);
>> +}
>> +
>>   static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status)
>>   {
>>          struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
>> @@ -502,12 +556,14 @@ static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status)
>>          spin_lock(&vdpasim->lock);
>>          vdpasim->status = status;
>>          spin_unlock(&vdpasim->lock);
>> +       vdpasim_set_notify_work(vdpasim, status & VIRTIO_CONFIG_S_DRIVER_OK);
>>   }
>>
>>   static int vdpasim_reset(struct vdpa_device *vdpa, bool clear)
>>   {
>>          struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
>>
>> +       vdpasim_set_notify_work(vdpasim, false);
>>          spin_lock(&vdpasim->lock);
>>          vdpasim->status = 0;
>>          vdpasim_do_reset(vdpasim);
>> @@ -672,11 +728,24 @@ static int vdpasim_dma_unmap(struct vdpa_device *vdpa, unsigned int asid,
>>          return 0;
>>   }
>>
>> +static struct vdpa_notification_area
>> +vdpasim_get_vq_notification(struct vdpa_device *vdpa, u16 qid)
>> +{
>> +       struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
>> +       struct vdpa_notification_area notify;
>> +
>> +       notify.addr = virt_to_phys((void *)vdpasim->notify);
>> +       notify.size = PAGE_SIZE;
>> +
>> +       return notify;
>> +}
>> +
>>   static void vdpasim_free(struct vdpa_device *vdpa)
>>   {
>>          struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
>>          int i;
>>
>> +       vdpasim_set_notify_work(vdpasim, false);
>>          cancel_work_sync(&vdpasim->work);
>>
>>          for (i = 0; i < vdpasim->dev_attr.nvqs; i++) {
>> @@ -693,6 +762,12 @@ static void vdpasim_free(struct vdpa_device *vdpa)
>>          vhost_iotlb_free(vdpasim->iommu);
>>          kfree(vdpasim->vqs);
>>          kfree(vdpasim->config);
>> +       if (vdpasim->notify) {
>> +#ifdef CONFIG_X86
>> +               set_memory_wb(vdpasim->notify, 1);
>> +#endif
>> +               free_page(vdpasim->notify);
>> +       }
>>   }
>>
>>   static const struct vdpa_config_ops vdpasim_config_ops = {
>> @@ -704,6 +779,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
>>          .get_vq_ready           = vdpasim_get_vq_ready,
>>          .set_vq_state           = vdpasim_set_vq_state,
>>          .get_vq_state           = vdpasim_get_vq_state,
>> +       .get_vq_notification    = vdpasim_get_vq_notification,
>>          .get_vq_align           = vdpasim_get_vq_align,
>>          .get_vq_group           = vdpasim_get_vq_group,
>>          .get_device_features    = vdpasim_get_device_features,
>> @@ -737,6 +813,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = {
>>          .get_vq_ready           = vdpasim_get_vq_ready,
>>          .set_vq_state           = vdpasim_set_vq_state,
>>          .get_vq_state           = vdpasim_get_vq_state,
>> +       .get_vq_notification    = vdpasim_get_vq_notification,
>>          .get_vq_align           = vdpasim_get_vq_align,
>>          .get_vq_group           = vdpasim_get_vq_group,
>>          .get_device_features    = vdpasim_get_device_features,
>> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h
>> index 0e78737dcc16..0769ccbd3911 100644
>> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.h
>> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h
>> @@ -69,6 +69,9 @@ struct vdpasim {
>>          bool running;
>>          /* spinlock to synchronize iommu table */
>>          spinlock_t iommu_lock;
>> +       /* dummy notify page */
>> +       unsigned long notify;
>> +       struct delayed_work notify_work;
>>   };
>>
>>   struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *attr,
>> --
>> 2.23.0
>>
> 
> .
  
Jason Wang Dec. 8, 2022, 7:37 a.m. UTC | #3
On Tue, Dec 6, 2022 at 4:46 PM Longpeng (Mike, Cloud Infrastructure
Service Product Dept.) <longpeng2@huawei.com> wrote:
>
>
>
> 在 2022/12/6 14:59, Jason Wang 写道:
> > On Mon, Dec 5, 2022 at 9:52 PM Longpeng(Mike) <longpeng2@huawei.com> wrote:
> >>
> >> From: Longpeng <longpeng2@huawei.com>
> >>
> >> Support doorbell mapping for vdpasim devices, then we can test the notify
> >> passthrough feature even if there's no real hardware on hand.
> >>
> >> Allocates a dummy page which is used to emulate the notify page of the device,
> >> all VQs share the same notify register  that initiated to 0xffff. A  periodic
> >> work will check whether there're requests need to process ( the value of the
> >> notify register is 0xffff or not ).
> >>
> >> This cap is disabled as default, users can enable it as follow:
> >>    modprobe vdpa_sim notify_passthrough=true
> >>
> >> Signed-off-by: Longpeng <longpeng2@huawei.com>
> >> ---
> >> Changes v1->v2:
> >>    - support both kick mode and passthrough mode. [Jason]
> >>    - poll the notify register first. [Jason, Michael]
> >> ---
> >>   drivers/vdpa/vdpa_sim/vdpa_sim.c | 77 ++++++++++++++++++++++++++++++++
> >>   drivers/vdpa/vdpa_sim/vdpa_sim.h |  3 ++
> >>   2 files changed, 80 insertions(+)
> >>
> >> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> >> index 7438a89ce939..07096a04dabb 100644
> >> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
> >> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> >> @@ -14,6 +14,7 @@
> >>   #include <linux/slab.h>
> >>   #include <linux/sched.h>
> >>   #include <linux/dma-map-ops.h>
> >> +#include <asm/set_memory.h>
> >>   #include <linux/vringh.h>
> >>   #include <linux/vdpa.h>
> >>   #include <linux/vhost_iotlb.h>
> >> @@ -36,9 +37,16 @@ module_param(max_iotlb_entries, int, 0444);
> >>   MODULE_PARM_DESC(max_iotlb_entries,
> >>                   "Maximum number of iotlb entries for each address space. 0 means unlimited. (default: 2048)");
> >>
> >> +static bool notify_passthrough;
> >> +module_param(notify_passthrough, bool, 0444);
> >> +MODULE_PARM_DESC(notify_passthrough,
> >> +                "Enable vq notify(doorbell) area mapping. (default: false)");
> >
> > I'm not sure if it's worth doing this, I think we can afford the cost
> > of periodic work (especially considering it's a simulator).
> >
> We can support kick mode and passthgourh mode simultaneously now, it is
> a little unnecessary. I would remove it in next version.
>
> >> +
> >>   #define VDPASIM_QUEUE_ALIGN PAGE_SIZE
> >>   #define VDPASIM_QUEUE_MAX 256
> >>   #define VDPASIM_VENDOR_ID 0
> >> +#define VDPASIM_VRING_POLL_PERIOD 100 /* ms */
> >> +#define VDPASIM_NOTIFY_DEFVAL 0xffff
> >>
> >>   static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa)
> >>   {
> >> @@ -246,6 +254,28 @@ static const struct dma_map_ops vdpasim_dma_ops = {
> >>   static const struct vdpa_config_ops vdpasim_config_ops;
> >>   static const struct vdpa_config_ops vdpasim_batch_config_ops;
> >>
> >> +static void vdpasim_notify_work(struct work_struct *work)
> >> +{
> >> +       struct vdpasim *vdpasim;
> >> +       u16 *val;
> >> +
> >> +       vdpasim = container_of(work, struct vdpasim, notify_work.work);
> >> +
> >> +       if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
> >> +               goto out;
> >> +
> >> +       if (!vdpasim->running)
> >> +               goto out;
> >> +
> >> +       val = (u16 *)vdpasim->notify;
> >> +       if (xchg(val, VDPASIM_NOTIFY_DEFVAL) != VDPASIM_NOTIFY_DEFVAL)
> >> +               schedule_work(&vdpasim->work);
> >> +
> >> +out:
> >> +       schedule_delayed_work(&vdpasim->notify_work,
> >> +                             msecs_to_jiffies(VDPASIM_VRING_POLL_PERIOD));
> >> +}
> >> +
> >>   struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
> >>                                 const struct vdpa_dev_set_config *config)
> >>   {
> >> @@ -287,6 +317,18 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
> >>          set_dma_ops(dev, &vdpasim_dma_ops);
> >>          vdpasim->vdpa.mdev = dev_attr->mgmt_dev;
> >>
> >> +       if (notify_passthrough) {
> >> +               INIT_DELAYED_WORK(&vdpasim->notify_work, vdpasim_notify_work);
> >> +
> >> +               vdpasim->notify = __get_free_page(GFP_KERNEL | __GFP_ZERO);
> >> +               if (!vdpasim->notify)
> >> +                       goto err_iommu;
> >> +#ifdef CONFIG_X86
> >> +               set_memory_uc(vdpasim->notify, 1);
> >> +#endif
> >
> > I had the same question with version 1, any reason for having this
> > part uncacheable? It's a hint that we have bugs somewhere. Are we
> > missing ACCESS/ORDER_PLATFORM or other features?
> >
> Oh, I had replied you in that thread.
>
> The vm_page_prot of notify mapping VMA is pgprot_noncached (see
> vhost_vdpa_fault) but the vdpasim->notify is WB, so we should set its
> memtype to UC here and set it back to WB when releasing the device.

Ok, but any reason to have a X86 guard for this? This basically makes
it won't work for non-x86 archs?

I think the correct way is to introduce a new config_ops to fetch the
correct pgprot. Then we are fine.

Thanks

>
> Otherwise, the warning in trace_pfn_remap() would be triggered.
> For example:
> x86/PAT: CPU 16/KVM:17819 map pfn RAM range req uncached-minus for [mem
> 0x5151f3000-0x5151f3fff], got write-back
>
> >> +               *(u16 *)vdpasim->notify = VDPASIM_NOTIFY_DEFVAL;
> >
> > WRITE_ONCE()?
> >
> Okay.
>
> >> +       }
> >> +
> >>          vdpasim->config = kzalloc(dev_attr->config_size, GFP_KERNEL);
> >>          if (!vdpasim->config)
> >>                  goto err_iommu;
> >> @@ -495,6 +537,18 @@ static u8 vdpasim_get_status(struct vdpa_device *vdpa)
> >>          return status;
> >>   }
> >>
> >> +static void vdpasim_set_notify_work(struct vdpasim *vdpasim, bool start)
> >> +{
> >> +       if (!notify_passthrough)
> >> +               return;
> >
> > Only two callers for this function: one is start another is stop. If
> > we decide to get rid of notify_passthrough, I'd rather opencode the
> > schedule/cancel_delayed().
> >
> Okay, thanks.
>
> > Thanks
> >
> >> +
> >> +       if (start)
> >> +               schedule_delayed_work(&vdpasim->notify_work,
> >> +                                     msecs_to_jiffies(VDPASIM_VRING_POLL_PERIOD));
> >> +       else
> >> +               cancel_delayed_work_sync(&vdpasim->notify_work);
> >> +}
> >> +
> >>   static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status)
> >>   {
> >>          struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
> >> @@ -502,12 +556,14 @@ static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status)
> >>          spin_lock(&vdpasim->lock);
> >>          vdpasim->status = status;
> >>          spin_unlock(&vdpasim->lock);
> >> +       vdpasim_set_notify_work(vdpasim, status & VIRTIO_CONFIG_S_DRIVER_OK);
> >>   }
> >>
> >>   static int vdpasim_reset(struct vdpa_device *vdpa, bool clear)
> >>   {
> >>          struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
> >>
> >> +       vdpasim_set_notify_work(vdpasim, false);
> >>          spin_lock(&vdpasim->lock);
> >>          vdpasim->status = 0;
> >>          vdpasim_do_reset(vdpasim);
> >> @@ -672,11 +728,24 @@ static int vdpasim_dma_unmap(struct vdpa_device *vdpa, unsigned int asid,
> >>          return 0;
> >>   }
> >>
> >> +static struct vdpa_notification_area
> >> +vdpasim_get_vq_notification(struct vdpa_device *vdpa, u16 qid)
> >> +{
> >> +       struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
> >> +       struct vdpa_notification_area notify;
> >> +
> >> +       notify.addr = virt_to_phys((void *)vdpasim->notify);
> >> +       notify.size = PAGE_SIZE;
> >> +
> >> +       return notify;
> >> +}
> >> +
> >>   static void vdpasim_free(struct vdpa_device *vdpa)
> >>   {
> >>          struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
> >>          int i;
> >>
> >> +       vdpasim_set_notify_work(vdpasim, false);
> >>          cancel_work_sync(&vdpasim->work);
> >>
> >>          for (i = 0; i < vdpasim->dev_attr.nvqs; i++) {
> >> @@ -693,6 +762,12 @@ static void vdpasim_free(struct vdpa_device *vdpa)
> >>          vhost_iotlb_free(vdpasim->iommu);
> >>          kfree(vdpasim->vqs);
> >>          kfree(vdpasim->config);
> >> +       if (vdpasim->notify) {
> >> +#ifdef CONFIG_X86
> >> +               set_memory_wb(vdpasim->notify, 1);
> >> +#endif
> >> +               free_page(vdpasim->notify);
> >> +       }
> >>   }
> >>
> >>   static const struct vdpa_config_ops vdpasim_config_ops = {
> >> @@ -704,6 +779,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
> >>          .get_vq_ready           = vdpasim_get_vq_ready,
> >>          .set_vq_state           = vdpasim_set_vq_state,
> >>          .get_vq_state           = vdpasim_get_vq_state,
> >> +       .get_vq_notification    = vdpasim_get_vq_notification,
> >>          .get_vq_align           = vdpasim_get_vq_align,
> >>          .get_vq_group           = vdpasim_get_vq_group,
> >>          .get_device_features    = vdpasim_get_device_features,
> >> @@ -737,6 +813,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = {
> >>          .get_vq_ready           = vdpasim_get_vq_ready,
> >>          .set_vq_state           = vdpasim_set_vq_state,
> >>          .get_vq_state           = vdpasim_get_vq_state,
> >> +       .get_vq_notification    = vdpasim_get_vq_notification,
> >>          .get_vq_align           = vdpasim_get_vq_align,
> >>          .get_vq_group           = vdpasim_get_vq_group,
> >>          .get_device_features    = vdpasim_get_device_features,
> >> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h
> >> index 0e78737dcc16..0769ccbd3911 100644
> >> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.h
> >> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h
> >> @@ -69,6 +69,9 @@ struct vdpasim {
> >>          bool running;
> >>          /* spinlock to synchronize iommu table */
> >>          spinlock_t iommu_lock;
> >> +       /* dummy notify page */
> >> +       unsigned long notify;
> >> +       struct delayed_work notify_work;
> >>   };
> >>
> >>   struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *attr,
> >> --
> >> 2.23.0
> >>
> >
> > .
>
  
Longpeng(Mike) Dec. 12, 2022, 12:51 a.m. UTC | #4
在 2022/12/8 15:37, Jason Wang 写道:
> On Tue, Dec 6, 2022 at 4:46 PM Longpeng (Mike, Cloud Infrastructure
> Service Product Dept.) <longpeng2@huawei.com> wrote:
>>
>>
>>
>> 在 2022/12/6 14:59, Jason Wang 写道:
>>> On Mon, Dec 5, 2022 at 9:52 PM Longpeng(Mike) <longpeng2@huawei.com> wrote:
>>>>
>>>> From: Longpeng <longpeng2@huawei.com>
>>>>
>>>> Support doorbell mapping for vdpasim devices, then we can test the notify
>>>> passthrough feature even if there's no real hardware on hand.
>>>>
>>>> Allocates a dummy page which is used to emulate the notify page of the device,
>>>> all VQs share the same notify register  that initiated to 0xffff. A  periodic
>>>> work will check whether there're requests need to process ( the value of the
>>>> notify register is 0xffff or not ).
>>>>
>>>> This cap is disabled as default, users can enable it as follow:
>>>>     modprobe vdpa_sim notify_passthrough=true
>>>>
>>>> Signed-off-by: Longpeng <longpeng2@huawei.com>
>>>> ---
>>>> Changes v1->v2:
>>>>     - support both kick mode and passthrough mode. [Jason]
>>>>     - poll the notify register first. [Jason, Michael]
>>>> ---
>>>>    drivers/vdpa/vdpa_sim/vdpa_sim.c | 77 ++++++++++++++++++++++++++++++++
>>>>    drivers/vdpa/vdpa_sim/vdpa_sim.h |  3 ++
>>>>    2 files changed, 80 insertions(+)
>>>>
>>>> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
>>>> index 7438a89ce939..07096a04dabb 100644
>>>> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
>>>> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
>>>> @@ -14,6 +14,7 @@
>>>>    #include <linux/slab.h>
>>>>    #include <linux/sched.h>
>>>>    #include <linux/dma-map-ops.h>
>>>> +#include <asm/set_memory.h>
>>>>    #include <linux/vringh.h>
>>>>    #include <linux/vdpa.h>
>>>>    #include <linux/vhost_iotlb.h>
>>>> @@ -36,9 +37,16 @@ module_param(max_iotlb_entries, int, 0444);
>>>>    MODULE_PARM_DESC(max_iotlb_entries,
>>>>                    "Maximum number of iotlb entries for each address space. 0 means unlimited. (default: 2048)");
>>>>
>>>> +static bool notify_passthrough;
>>>> +module_param(notify_passthrough, bool, 0444);
>>>> +MODULE_PARM_DESC(notify_passthrough,
>>>> +                "Enable vq notify(doorbell) area mapping. (default: false)");
>>>
>>> I'm not sure if it's worth doing this, I think we can afford the cost
>>> of periodic work (especially considering it's a simulator).
>>>
>> We can support kick mode and passthgourh mode simultaneously now, it is
>> a little unnecessary. I would remove it in next version.
>>
>>>> +
>>>>    #define VDPASIM_QUEUE_ALIGN PAGE_SIZE
>>>>    #define VDPASIM_QUEUE_MAX 256
>>>>    #define VDPASIM_VENDOR_ID 0
>>>> +#define VDPASIM_VRING_POLL_PERIOD 100 /* ms */
>>>> +#define VDPASIM_NOTIFY_DEFVAL 0xffff
>>>>
>>>>    static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa)
>>>>    {
>>>> @@ -246,6 +254,28 @@ static const struct dma_map_ops vdpasim_dma_ops = {
>>>>    static const struct vdpa_config_ops vdpasim_config_ops;
>>>>    static const struct vdpa_config_ops vdpasim_batch_config_ops;
>>>>
>>>> +static void vdpasim_notify_work(struct work_struct *work)
>>>> +{
>>>> +       struct vdpasim *vdpasim;
>>>> +       u16 *val;
>>>> +
>>>> +       vdpasim = container_of(work, struct vdpasim, notify_work.work);
>>>> +
>>>> +       if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
>>>> +               goto out;
>>>> +
>>>> +       if (!vdpasim->running)
>>>> +               goto out;
>>>> +
>>>> +       val = (u16 *)vdpasim->notify;
>>>> +       if (xchg(val, VDPASIM_NOTIFY_DEFVAL) != VDPASIM_NOTIFY_DEFVAL)
>>>> +               schedule_work(&vdpasim->work);
>>>> +
>>>> +out:
>>>> +       schedule_delayed_work(&vdpasim->notify_work,
>>>> +                             msecs_to_jiffies(VDPASIM_VRING_POLL_PERIOD));
>>>> +}
>>>> +
>>>>    struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
>>>>                                  const struct vdpa_dev_set_config *config)
>>>>    {
>>>> @@ -287,6 +317,18 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
>>>>           set_dma_ops(dev, &vdpasim_dma_ops);
>>>>           vdpasim->vdpa.mdev = dev_attr->mgmt_dev;
>>>>
>>>> +       if (notify_passthrough) {
>>>> +               INIT_DELAYED_WORK(&vdpasim->notify_work, vdpasim_notify_work);
>>>> +
>>>> +               vdpasim->notify = __get_free_page(GFP_KERNEL | __GFP_ZERO);
>>>> +               if (!vdpasim->notify)
>>>> +                       goto err_iommu;
>>>> +#ifdef CONFIG_X86
>>>> +               set_memory_uc(vdpasim->notify, 1);
>>>> +#endif
>>>
>>> I had the same question with version 1, any reason for having this
>>> part uncacheable? It's a hint that we have bugs somewhere. Are we
>>> missing ACCESS/ORDER_PLATFORM or other features?
>>>
>> Oh, I had replied you in that thread.
>>
>> The vm_page_prot of notify mapping VMA is pgprot_noncached (see
>> vhost_vdpa_fault) but the vdpasim->notify is WB, so we should set its
>> memtype to UC here and set it back to WB when releasing the device.
> 
> Ok, but any reason to have a X86 guard for this? This basically makes
> it won't work for non-x86 archs?
> 
Because only X86 defines __HAVE_PFNMAP_TRACKING, track_pfn_remap does 
nothing on other archs.

> I think the correct way is to introduce a new config_ops to fetch the
> correct pgprot. Then we are fine.
> 
Is it worth doing this? It seems the vdpasim device is just a special 
case (using RAM to emulates the notify area) and other real HW devices 
work fine with the current implementation?

> Thanks
> 
>>
>> Otherwise, the warning in trace_pfn_remap() would be triggered.
>> For example:
>> x86/PAT: CPU 16/KVM:17819 map pfn RAM range req uncached-minus for [mem
>> 0x5151f3000-0x5151f3fff], got write-back
>>
>>>> +               *(u16 *)vdpasim->notify = VDPASIM_NOTIFY_DEFVAL;
>>>
>>> WRITE_ONCE()?
>>>
>> Okay.
>>
>>>> +       }
>>>> +
>>>>           vdpasim->config = kzalloc(dev_attr->config_size, GFP_KERNEL);
>>>>           if (!vdpasim->config)
>>>>                   goto err_iommu;
>>>> @@ -495,6 +537,18 @@ static u8 vdpasim_get_status(struct vdpa_device *vdpa)
>>>>           return status;
>>>>    }
>>>>
>>>> +static void vdpasim_set_notify_work(struct vdpasim *vdpasim, bool start)
>>>> +{
>>>> +       if (!notify_passthrough)
>>>> +               return;
>>>
>>> Only two callers for this function: one is start another is stop. If
>>> we decide to get rid of notify_passthrough, I'd rather opencode the
>>> schedule/cancel_delayed().
>>>
>> Okay, thanks.
>>
>>> Thanks
>>>
>>>> +
>>>> +       if (start)
>>>> +               schedule_delayed_work(&vdpasim->notify_work,
>>>> +                                     msecs_to_jiffies(VDPASIM_VRING_POLL_PERIOD));
>>>> +       else
>>>> +               cancel_delayed_work_sync(&vdpasim->notify_work);
>>>> +}
>>>> +
>>>>    static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status)
>>>>    {
>>>>           struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
>>>> @@ -502,12 +556,14 @@ static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status)
>>>>           spin_lock(&vdpasim->lock);
>>>>           vdpasim->status = status;
>>>>           spin_unlock(&vdpasim->lock);
>>>> +       vdpasim_set_notify_work(vdpasim, status & VIRTIO_CONFIG_S_DRIVER_OK);
>>>>    }
>>>>
>>>>    static int vdpasim_reset(struct vdpa_device *vdpa, bool clear)
>>>>    {
>>>>           struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
>>>>
>>>> +       vdpasim_set_notify_work(vdpasim, false);
>>>>           spin_lock(&vdpasim->lock);
>>>>           vdpasim->status = 0;
>>>>           vdpasim_do_reset(vdpasim);
>>>> @@ -672,11 +728,24 @@ static int vdpasim_dma_unmap(struct vdpa_device *vdpa, unsigned int asid,
>>>>           return 0;
>>>>    }
>>>>
>>>> +static struct vdpa_notification_area
>>>> +vdpasim_get_vq_notification(struct vdpa_device *vdpa, u16 qid)
>>>> +{
>>>> +       struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
>>>> +       struct vdpa_notification_area notify;
>>>> +
>>>> +       notify.addr = virt_to_phys((void *)vdpasim->notify);
>>>> +       notify.size = PAGE_SIZE;
>>>> +
>>>> +       return notify;
>>>> +}
>>>> +
>>>>    static void vdpasim_free(struct vdpa_device *vdpa)
>>>>    {
>>>>           struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
>>>>           int i;
>>>>
>>>> +       vdpasim_set_notify_work(vdpasim, false);
>>>>           cancel_work_sync(&vdpasim->work);
>>>>
>>>>           for (i = 0; i < vdpasim->dev_attr.nvqs; i++) {
>>>> @@ -693,6 +762,12 @@ static void vdpasim_free(struct vdpa_device *vdpa)
>>>>           vhost_iotlb_free(vdpasim->iommu);
>>>>           kfree(vdpasim->vqs);
>>>>           kfree(vdpasim->config);
>>>> +       if (vdpasim->notify) {
>>>> +#ifdef CONFIG_X86
>>>> +               set_memory_wb(vdpasim->notify, 1);
>>>> +#endif
>>>> +               free_page(vdpasim->notify);
>>>> +       }
>>>>    }
>>>>
>>>>    static const struct vdpa_config_ops vdpasim_config_ops = {
>>>> @@ -704,6 +779,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
>>>>           .get_vq_ready           = vdpasim_get_vq_ready,
>>>>           .set_vq_state           = vdpasim_set_vq_state,
>>>>           .get_vq_state           = vdpasim_get_vq_state,
>>>> +       .get_vq_notification    = vdpasim_get_vq_notification,
>>>>           .get_vq_align           = vdpasim_get_vq_align,
>>>>           .get_vq_group           = vdpasim_get_vq_group,
>>>>           .get_device_features    = vdpasim_get_device_features,
>>>> @@ -737,6 +813,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = {
>>>>           .get_vq_ready           = vdpasim_get_vq_ready,
>>>>           .set_vq_state           = vdpasim_set_vq_state,
>>>>           .get_vq_state           = vdpasim_get_vq_state,
>>>> +       .get_vq_notification    = vdpasim_get_vq_notification,
>>>>           .get_vq_align           = vdpasim_get_vq_align,
>>>>           .get_vq_group           = vdpasim_get_vq_group,
>>>>           .get_device_features    = vdpasim_get_device_features,
>>>> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h
>>>> index 0e78737dcc16..0769ccbd3911 100644
>>>> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.h
>>>> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h
>>>> @@ -69,6 +69,9 @@ struct vdpasim {
>>>>           bool running;
>>>>           /* spinlock to synchronize iommu table */
>>>>           spinlock_t iommu_lock;
>>>> +       /* dummy notify page */
>>>> +       unsigned long notify;
>>>> +       struct delayed_work notify_work;
>>>>    };
>>>>
>>>>    struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *attr,
>>>> --
>>>> 2.23.0
>>>>
>>>
>>> .
>>
> 
> .
  
Jason Wang Dec. 12, 2022, 3:40 a.m. UTC | #5
On Mon, Dec 12, 2022 at 8:51 AM Longpeng (Mike, Cloud Infrastructure
Service Product Dept.) <longpeng2@huawei.com> wrote:
>
>
>
> 在 2022/12/8 15:37, Jason Wang 写道:
> > On Tue, Dec 6, 2022 at 4:46 PM Longpeng (Mike, Cloud Infrastructure
> > Service Product Dept.) <longpeng2@huawei.com> wrote:
> >>
> >>
> >>
> >> 在 2022/12/6 14:59, Jason Wang 写道:
> >>> On Mon, Dec 5, 2022 at 9:52 PM Longpeng(Mike) <longpeng2@huawei.com> wrote:
> >>>>
> >>>> From: Longpeng <longpeng2@huawei.com>
> >>>>
> >>>> Support doorbell mapping for vdpasim devices, then we can test the notify
> >>>> passthrough feature even if there's no real hardware on hand.
> >>>>
> >>>> Allocates a dummy page which is used to emulate the notify page of the device,
> >>>> all VQs share the same notify register  that initiated to 0xffff. A  periodic
> >>>> work will check whether there're requests need to process ( the value of the
> >>>> notify register is 0xffff or not ).
> >>>>
> >>>> This cap is disabled as default, users can enable it as follow:
> >>>>     modprobe vdpa_sim notify_passthrough=true
> >>>>
> >>>> Signed-off-by: Longpeng <longpeng2@huawei.com>
> >>>> ---
> >>>> Changes v1->v2:
> >>>>     - support both kick mode and passthrough mode. [Jason]
> >>>>     - poll the notify register first. [Jason, Michael]
> >>>> ---
> >>>>    drivers/vdpa/vdpa_sim/vdpa_sim.c | 77 ++++++++++++++++++++++++++++++++
> >>>>    drivers/vdpa/vdpa_sim/vdpa_sim.h |  3 ++
> >>>>    2 files changed, 80 insertions(+)
> >>>>
> >>>> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> >>>> index 7438a89ce939..07096a04dabb 100644
> >>>> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
> >>>> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> >>>> @@ -14,6 +14,7 @@
> >>>>    #include <linux/slab.h>
> >>>>    #include <linux/sched.h>
> >>>>    #include <linux/dma-map-ops.h>
> >>>> +#include <asm/set_memory.h>
> >>>>    #include <linux/vringh.h>
> >>>>    #include <linux/vdpa.h>
> >>>>    #include <linux/vhost_iotlb.h>
> >>>> @@ -36,9 +37,16 @@ module_param(max_iotlb_entries, int, 0444);
> >>>>    MODULE_PARM_DESC(max_iotlb_entries,
> >>>>                    "Maximum number of iotlb entries for each address space. 0 means unlimited. (default: 2048)");
> >>>>
> >>>> +static bool notify_passthrough;
> >>>> +module_param(notify_passthrough, bool, 0444);
> >>>> +MODULE_PARM_DESC(notify_passthrough,
> >>>> +                "Enable vq notify(doorbell) area mapping. (default: false)");
> >>>
> >>> I'm not sure if it's worth doing this, I think we can afford the cost
> >>> of periodic work (especially considering it's a simulator).
> >>>
> >> We can support kick mode and passthgourh mode simultaneously now, it is
> >> a little unnecessary. I would remove it in next version.
> >>
> >>>> +
> >>>>    #define VDPASIM_QUEUE_ALIGN PAGE_SIZE
> >>>>    #define VDPASIM_QUEUE_MAX 256
> >>>>    #define VDPASIM_VENDOR_ID 0
> >>>> +#define VDPASIM_VRING_POLL_PERIOD 100 /* ms */
> >>>> +#define VDPASIM_NOTIFY_DEFVAL 0xffff
> >>>>
> >>>>    static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa)
> >>>>    {
> >>>> @@ -246,6 +254,28 @@ static const struct dma_map_ops vdpasim_dma_ops = {
> >>>>    static const struct vdpa_config_ops vdpasim_config_ops;
> >>>>    static const struct vdpa_config_ops vdpasim_batch_config_ops;
> >>>>
> >>>> +static void vdpasim_notify_work(struct work_struct *work)
> >>>> +{
> >>>> +       struct vdpasim *vdpasim;
> >>>> +       u16 *val;
> >>>> +
> >>>> +       vdpasim = container_of(work, struct vdpasim, notify_work.work);
> >>>> +
> >>>> +       if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
> >>>> +               goto out;
> >>>> +
> >>>> +       if (!vdpasim->running)
> >>>> +               goto out;
> >>>> +
> >>>> +       val = (u16 *)vdpasim->notify;
> >>>> +       if (xchg(val, VDPASIM_NOTIFY_DEFVAL) != VDPASIM_NOTIFY_DEFVAL)
> >>>> +               schedule_work(&vdpasim->work);
> >>>> +
> >>>> +out:
> >>>> +       schedule_delayed_work(&vdpasim->notify_work,
> >>>> +                             msecs_to_jiffies(VDPASIM_VRING_POLL_PERIOD));
> >>>> +}
> >>>> +
> >>>>    struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
> >>>>                                  const struct vdpa_dev_set_config *config)
> >>>>    {
> >>>> @@ -287,6 +317,18 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
> >>>>           set_dma_ops(dev, &vdpasim_dma_ops);
> >>>>           vdpasim->vdpa.mdev = dev_attr->mgmt_dev;
> >>>>
> >>>> +       if (notify_passthrough) {
> >>>> +               INIT_DELAYED_WORK(&vdpasim->notify_work, vdpasim_notify_work);
> >>>> +
> >>>> +               vdpasim->notify = __get_free_page(GFP_KERNEL | __GFP_ZERO);
> >>>> +               if (!vdpasim->notify)
> >>>> +                       goto err_iommu;
> >>>> +#ifdef CONFIG_X86
> >>>> +               set_memory_uc(vdpasim->notify, 1);
> >>>> +#endif
> >>>
> >>> I had the same question with version 1, any reason for having this
> >>> part uncacheable? It's a hint that we have bugs somewhere. Are we
> >>> missing ACCESS/ORDER_PLATFORM or other features?
> >>>
> >> Oh, I had replied you in that thread.
> >>
> >> The vm_page_prot of notify mapping VMA is pgprot_noncached (see
> >> vhost_vdpa_fault) but the vdpasim->notify is WB, so we should set its
> >> memtype to UC here and set it back to WB when releasing the device.
> >
> > Ok, but any reason to have a X86 guard for this? This basically makes
> > it won't work for non-x86 archs?
> >
> Because only X86 defines __HAVE_PFNMAP_TRACKING, track_pfn_remap does
> nothing on other archs.

Ok, but this seems fragile, as we don't have a guarantee that it won't
be implemented in the future.

>
> > I think the correct way is to introduce a new config_ops to fetch the
> > correct pgprot. Then we are fine.
> >
> Is it worth doing this?

I think it's worth since it:

1) only require small tweak on the vdpa core
2) avoid arch specific code in the simulator

> It seems the vdpasim device is just a special
> case (using RAM to emulates the notify area) and other real HW devices
> work fine with the current implementation?

There's no guarantee that we won't have any other software based vDPA
devices. We've already had a simulator and VDUSE. And we should not
exclude the possibility of another one.

Thanks

>
> > Thanks
> >
> >>
> >> Otherwise, the warning in trace_pfn_remap() would be triggered.
> >> For example:
> >> x86/PAT: CPU 16/KVM:17819 map pfn RAM range req uncached-minus for [mem
> >> 0x5151f3000-0x5151f3fff], got write-back
> >>
> >>>> +               *(u16 *)vdpasim->notify = VDPASIM_NOTIFY_DEFVAL;
> >>>
> >>> WRITE_ONCE()?
> >>>
> >> Okay.
> >>
> >>>> +       }
> >>>> +
> >>>>           vdpasim->config = kzalloc(dev_attr->config_size, GFP_KERNEL);
> >>>>           if (!vdpasim->config)
> >>>>                   goto err_iommu;
> >>>> @@ -495,6 +537,18 @@ static u8 vdpasim_get_status(struct vdpa_device *vdpa)
> >>>>           return status;
> >>>>    }
> >>>>
> >>>> +static void vdpasim_set_notify_work(struct vdpasim *vdpasim, bool start)
> >>>> +{
> >>>> +       if (!notify_passthrough)
> >>>> +               return;
> >>>
> >>> Only two callers for this function: one is start another is stop. If
> >>> we decide to get rid of notify_passthrough, I'd rather opencode the
> >>> schedule/cancel_delayed().
> >>>
> >> Okay, thanks.
> >>
> >>> Thanks
> >>>
> >>>> +
> >>>> +       if (start)
> >>>> +               schedule_delayed_work(&vdpasim->notify_work,
> >>>> +                                     msecs_to_jiffies(VDPASIM_VRING_POLL_PERIOD));
> >>>> +       else
> >>>> +               cancel_delayed_work_sync(&vdpasim->notify_work);
> >>>> +}
> >>>> +
> >>>>    static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status)
> >>>>    {
> >>>>           struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
> >>>> @@ -502,12 +556,14 @@ static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status)
> >>>>           spin_lock(&vdpasim->lock);
> >>>>           vdpasim->status = status;
> >>>>           spin_unlock(&vdpasim->lock);
> >>>> +       vdpasim_set_notify_work(vdpasim, status & VIRTIO_CONFIG_S_DRIVER_OK);
> >>>>    }
> >>>>
> >>>>    static int vdpasim_reset(struct vdpa_device *vdpa, bool clear)
> >>>>    {
> >>>>           struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
> >>>>
> >>>> +       vdpasim_set_notify_work(vdpasim, false);
> >>>>           spin_lock(&vdpasim->lock);
> >>>>           vdpasim->status = 0;
> >>>>           vdpasim_do_reset(vdpasim);
> >>>> @@ -672,11 +728,24 @@ static int vdpasim_dma_unmap(struct vdpa_device *vdpa, unsigned int asid,
> >>>>           return 0;
> >>>>    }
> >>>>
> >>>> +static struct vdpa_notification_area
> >>>> +vdpasim_get_vq_notification(struct vdpa_device *vdpa, u16 qid)
> >>>> +{
> >>>> +       struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
> >>>> +       struct vdpa_notification_area notify;
> >>>> +
> >>>> +       notify.addr = virt_to_phys((void *)vdpasim->notify);
> >>>> +       notify.size = PAGE_SIZE;
> >>>> +
> >>>> +       return notify;
> >>>> +}
> >>>> +
> >>>>    static void vdpasim_free(struct vdpa_device *vdpa)
> >>>>    {
> >>>>           struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
> >>>>           int i;
> >>>>
> >>>> +       vdpasim_set_notify_work(vdpasim, false);
> >>>>           cancel_work_sync(&vdpasim->work);
> >>>>
> >>>>           for (i = 0; i < vdpasim->dev_attr.nvqs; i++) {
> >>>> @@ -693,6 +762,12 @@ static void vdpasim_free(struct vdpa_device *vdpa)
> >>>>           vhost_iotlb_free(vdpasim->iommu);
> >>>>           kfree(vdpasim->vqs);
> >>>>           kfree(vdpasim->config);
> >>>> +       if (vdpasim->notify) {
> >>>> +#ifdef CONFIG_X86
> >>>> +               set_memory_wb(vdpasim->notify, 1);
> >>>> +#endif
> >>>> +               free_page(vdpasim->notify);
> >>>> +       }
> >>>>    }
> >>>>
> >>>>    static const struct vdpa_config_ops vdpasim_config_ops = {
> >>>> @@ -704,6 +779,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
> >>>>           .get_vq_ready           = vdpasim_get_vq_ready,
> >>>>           .set_vq_state           = vdpasim_set_vq_state,
> >>>>           .get_vq_state           = vdpasim_get_vq_state,
> >>>> +       .get_vq_notification    = vdpasim_get_vq_notification,
> >>>>           .get_vq_align           = vdpasim_get_vq_align,
> >>>>           .get_vq_group           = vdpasim_get_vq_group,
> >>>>           .get_device_features    = vdpasim_get_device_features,
> >>>> @@ -737,6 +813,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = {
> >>>>           .get_vq_ready           = vdpasim_get_vq_ready,
> >>>>           .set_vq_state           = vdpasim_set_vq_state,
> >>>>           .get_vq_state           = vdpasim_get_vq_state,
> >>>> +       .get_vq_notification    = vdpasim_get_vq_notification,
> >>>>           .get_vq_align           = vdpasim_get_vq_align,
> >>>>           .get_vq_group           = vdpasim_get_vq_group,
> >>>>           .get_device_features    = vdpasim_get_device_features,
> >>>> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h
> >>>> index 0e78737dcc16..0769ccbd3911 100644
> >>>> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.h
> >>>> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h
> >>>> @@ -69,6 +69,9 @@ struct vdpasim {
> >>>>           bool running;
> >>>>           /* spinlock to synchronize iommu table */
> >>>>           spinlock_t iommu_lock;
> >>>> +       /* dummy notify page */
> >>>> +       unsigned long notify;
> >>>> +       struct delayed_work notify_work;
> >>>>    };
> >>>>
> >>>>    struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *attr,
> >>>> --
> >>>> 2.23.0
> >>>>
> >>>
> >>> .
> >>
> >
> > .
>
  
Longpeng(Mike) Dec. 12, 2022, 3:54 a.m. UTC | #6
在 2022/12/12 11:40, Jason Wang 写道:
> On Mon, Dec 12, 2022 at 8:51 AM Longpeng (Mike, Cloud Infrastructure
> Service Product Dept.) <longpeng2@huawei.com> wrote:
>>
>>
>>
>> 在 2022/12/8 15:37, Jason Wang 写道:
>>> On Tue, Dec 6, 2022 at 4:46 PM Longpeng (Mike, Cloud Infrastructure
>>> Service Product Dept.) <longpeng2@huawei.com> wrote:
>>>>
>>>>
>>>>
>>>> 在 2022/12/6 14:59, Jason Wang 写道:
>>>>> On Mon, Dec 5, 2022 at 9:52 PM Longpeng(Mike) <longpeng2@huawei.com> wrote:
>>>>>>
>>>>>> From: Longpeng <longpeng2@huawei.com>
>>>>>>
>>>>>> Support doorbell mapping for vdpasim devices, then we can test the notify
>>>>>> passthrough feature even if there's no real hardware on hand.
>>>>>>
>>>>>> Allocates a dummy page which is used to emulate the notify page of the device,
>>>>>> all VQs share the same notify register  that initiated to 0xffff. A  periodic
>>>>>> work will check whether there're requests need to process ( the value of the
>>>>>> notify register is 0xffff or not ).
>>>>>>
>>>>>> This cap is disabled as default, users can enable it as follow:
>>>>>>      modprobe vdpa_sim notify_passthrough=true
>>>>>>
>>>>>> Signed-off-by: Longpeng <longpeng2@huawei.com>
>>>>>> ---
>>>>>> Changes v1->v2:
>>>>>>      - support both kick mode and passthrough mode. [Jason]
>>>>>>      - poll the notify register first. [Jason, Michael]
>>>>>> ---
>>>>>>     drivers/vdpa/vdpa_sim/vdpa_sim.c | 77 ++++++++++++++++++++++++++++++++
>>>>>>     drivers/vdpa/vdpa_sim/vdpa_sim.h |  3 ++
>>>>>>     2 files changed, 80 insertions(+)
>>>>>>
>>>>>> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
>>>>>> index 7438a89ce939..07096a04dabb 100644
>>>>>> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
>>>>>> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
>>>>>> @@ -14,6 +14,7 @@
>>>>>>     #include <linux/slab.h>
>>>>>>     #include <linux/sched.h>
>>>>>>     #include <linux/dma-map-ops.h>
>>>>>> +#include <asm/set_memory.h>
>>>>>>     #include <linux/vringh.h>
>>>>>>     #include <linux/vdpa.h>
>>>>>>     #include <linux/vhost_iotlb.h>
>>>>>> @@ -36,9 +37,16 @@ module_param(max_iotlb_entries, int, 0444);
>>>>>>     MODULE_PARM_DESC(max_iotlb_entries,
>>>>>>                     "Maximum number of iotlb entries for each address space. 0 means unlimited. (default: 2048)");
>>>>>>
>>>>>> +static bool notify_passthrough;
>>>>>> +module_param(notify_passthrough, bool, 0444);
>>>>>> +MODULE_PARM_DESC(notify_passthrough,
>>>>>> +                "Enable vq notify(doorbell) area mapping. (default: false)");
>>>>>
>>>>> I'm not sure if it's worth doing this, I think we can afford the cost
>>>>> of periodic work (especially considering it's a simulator).
>>>>>
>>>> We can support kick mode and passthgourh mode simultaneously now, it is
>>>> a little unnecessary. I would remove it in next version.
>>>>
>>>>>> +
>>>>>>     #define VDPASIM_QUEUE_ALIGN PAGE_SIZE
>>>>>>     #define VDPASIM_QUEUE_MAX 256
>>>>>>     #define VDPASIM_VENDOR_ID 0
>>>>>> +#define VDPASIM_VRING_POLL_PERIOD 100 /* ms */
>>>>>> +#define VDPASIM_NOTIFY_DEFVAL 0xffff
>>>>>>
>>>>>>     static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa)
>>>>>>     {
>>>>>> @@ -246,6 +254,28 @@ static const struct dma_map_ops vdpasim_dma_ops = {
>>>>>>     static const struct vdpa_config_ops vdpasim_config_ops;
>>>>>>     static const struct vdpa_config_ops vdpasim_batch_config_ops;
>>>>>>
>>>>>> +static void vdpasim_notify_work(struct work_struct *work)
>>>>>> +{
>>>>>> +       struct vdpasim *vdpasim;
>>>>>> +       u16 *val;
>>>>>> +
>>>>>> +       vdpasim = container_of(work, struct vdpasim, notify_work.work);
>>>>>> +
>>>>>> +       if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
>>>>>> +               goto out;
>>>>>> +
>>>>>> +       if (!vdpasim->running)
>>>>>> +               goto out;
>>>>>> +
>>>>>> +       val = (u16 *)vdpasim->notify;
>>>>>> +       if (xchg(val, VDPASIM_NOTIFY_DEFVAL) != VDPASIM_NOTIFY_DEFVAL)
>>>>>> +               schedule_work(&vdpasim->work);
>>>>>> +
>>>>>> +out:
>>>>>> +       schedule_delayed_work(&vdpasim->notify_work,
>>>>>> +                             msecs_to_jiffies(VDPASIM_VRING_POLL_PERIOD));
>>>>>> +}
>>>>>> +
>>>>>>     struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
>>>>>>                                   const struct vdpa_dev_set_config *config)
>>>>>>     {
>>>>>> @@ -287,6 +317,18 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
>>>>>>            set_dma_ops(dev, &vdpasim_dma_ops);
>>>>>>            vdpasim->vdpa.mdev = dev_attr->mgmt_dev;
>>>>>>
>>>>>> +       if (notify_passthrough) {
>>>>>> +               INIT_DELAYED_WORK(&vdpasim->notify_work, vdpasim_notify_work);
>>>>>> +
>>>>>> +               vdpasim->notify = __get_free_page(GFP_KERNEL | __GFP_ZERO);
>>>>>> +               if (!vdpasim->notify)
>>>>>> +                       goto err_iommu;
>>>>>> +#ifdef CONFIG_X86
>>>>>> +               set_memory_uc(vdpasim->notify, 1);
>>>>>> +#endif
>>>>>
>>>>> I had the same question with version 1, any reason for having this
>>>>> part uncacheable? It's a hint that we have bugs somewhere. Are we
>>>>> missing ACCESS/ORDER_PLATFORM or other features?
>>>>>
>>>> Oh, I had replied you in that thread.
>>>>
>>>> The vm_page_prot of notify mapping VMA is pgprot_noncached (see
>>>> vhost_vdpa_fault) but the vdpasim->notify is WB, so we should set its
>>>> memtype to UC here and set it back to WB when releasing the device.
>>>
>>> Ok, but any reason to have a X86 guard for this? This basically makes
>>> it won't work for non-x86 archs?
>>>
>> Because only X86 defines __HAVE_PFNMAP_TRACKING, track_pfn_remap does
>> nothing on other archs.
> 
> Ok, but this seems fragile, as we don't have a guarantee that it won't
> be implemented in the future.
> 
>>
>>> I think the correct way is to introduce a new config_ops to fetch the
>>> correct pgprot. Then we are fine.
>>>
>> Is it worth doing this?
> 
> I think it's worth since it:
> 
> 1) only require small tweak on the vdpa core
> 2) avoid arch specific code in the simulator
> 
>> It seems the vdpasim device is just a special
>> case (using RAM to emulates the notify area) and other real HW devices
>> work fine with the current implementation?
> 
> There's no guarantee that we won't have any other software based vDPA
> devices. We've already had a simulator and VDUSE. And we should not
> exclude the possibility of another one.
> 
Make sense.

Any suggestion for the name of this new config_ops? get_notify_pgprot? 
Or add a new field (pgrot) in vdpa_notification_area?

> Thanks
> 
>>
>>> Thanks
>>>
>>>>
>>>> Otherwise, the warning in trace_pfn_remap() would be triggered.
>>>> For example:
>>>> x86/PAT: CPU 16/KVM:17819 map pfn RAM range req uncached-minus for [mem
>>>> 0x5151f3000-0x5151f3fff], got write-back
>>>>
>>>>>> +               *(u16 *)vdpasim->notify = VDPASIM_NOTIFY_DEFVAL;
>>>>>
>>>>> WRITE_ONCE()?
>>>>>
>>>> Okay.
>>>>
>>>>>> +       }
>>>>>> +
>>>>>>            vdpasim->config = kzalloc(dev_attr->config_size, GFP_KERNEL);
>>>>>>            if (!vdpasim->config)
>>>>>>                    goto err_iommu;
>>>>>> @@ -495,6 +537,18 @@ static u8 vdpasim_get_status(struct vdpa_device *vdpa)
>>>>>>            return status;
>>>>>>     }
>>>>>>
>>>>>> +static void vdpasim_set_notify_work(struct vdpasim *vdpasim, bool start)
>>>>>> +{
>>>>>> +       if (!notify_passthrough)
>>>>>> +               return;
>>>>>
>>>>> Only two callers for this function: one is start another is stop. If
>>>>> we decide to get rid of notify_passthrough, I'd rather opencode the
>>>>> schedule/cancel_delayed().
>>>>>
>>>> Okay, thanks.
>>>>
>>>>> Thanks
>>>>>
>>>>>> +
>>>>>> +       if (start)
>>>>>> +               schedule_delayed_work(&vdpasim->notify_work,
>>>>>> +                                     msecs_to_jiffies(VDPASIM_VRING_POLL_PERIOD));
>>>>>> +       else
>>>>>> +               cancel_delayed_work_sync(&vdpasim->notify_work);
>>>>>> +}
>>>>>> +
>>>>>>     static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status)
>>>>>>     {
>>>>>>            struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
>>>>>> @@ -502,12 +556,14 @@ static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status)
>>>>>>            spin_lock(&vdpasim->lock);
>>>>>>            vdpasim->status = status;
>>>>>>            spin_unlock(&vdpasim->lock);
>>>>>> +       vdpasim_set_notify_work(vdpasim, status & VIRTIO_CONFIG_S_DRIVER_OK);
>>>>>>     }
>>>>>>
>>>>>>     static int vdpasim_reset(struct vdpa_device *vdpa, bool clear)
>>>>>>     {
>>>>>>            struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
>>>>>>
>>>>>> +       vdpasim_set_notify_work(vdpasim, false);
>>>>>>            spin_lock(&vdpasim->lock);
>>>>>>            vdpasim->status = 0;
>>>>>>            vdpasim_do_reset(vdpasim);
>>>>>> @@ -672,11 +728,24 @@ static int vdpasim_dma_unmap(struct vdpa_device *vdpa, unsigned int asid,
>>>>>>            return 0;
>>>>>>     }
>>>>>>
>>>>>> +static struct vdpa_notification_area
>>>>>> +vdpasim_get_vq_notification(struct vdpa_device *vdpa, u16 qid)
>>>>>> +{
>>>>>> +       struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
>>>>>> +       struct vdpa_notification_area notify;
>>>>>> +
>>>>>> +       notify.addr = virt_to_phys((void *)vdpasim->notify);
>>>>>> +       notify.size = PAGE_SIZE;
>>>>>> +
>>>>>> +       return notify;
>>>>>> +}
>>>>>> +
>>>>>>     static void vdpasim_free(struct vdpa_device *vdpa)
>>>>>>     {
>>>>>>            struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
>>>>>>            int i;
>>>>>>
>>>>>> +       vdpasim_set_notify_work(vdpasim, false);
>>>>>>            cancel_work_sync(&vdpasim->work);
>>>>>>
>>>>>>            for (i = 0; i < vdpasim->dev_attr.nvqs; i++) {
>>>>>> @@ -693,6 +762,12 @@ static void vdpasim_free(struct vdpa_device *vdpa)
>>>>>>            vhost_iotlb_free(vdpasim->iommu);
>>>>>>            kfree(vdpasim->vqs);
>>>>>>            kfree(vdpasim->config);
>>>>>> +       if (vdpasim->notify) {
>>>>>> +#ifdef CONFIG_X86
>>>>>> +               set_memory_wb(vdpasim->notify, 1);
>>>>>> +#endif
>>>>>> +               free_page(vdpasim->notify);
>>>>>> +       }
>>>>>>     }
>>>>>>
>>>>>>     static const struct vdpa_config_ops vdpasim_config_ops = {
>>>>>> @@ -704,6 +779,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
>>>>>>            .get_vq_ready           = vdpasim_get_vq_ready,
>>>>>>            .set_vq_state           = vdpasim_set_vq_state,
>>>>>>            .get_vq_state           = vdpasim_get_vq_state,
>>>>>> +       .get_vq_notification    = vdpasim_get_vq_notification,
>>>>>>            .get_vq_align           = vdpasim_get_vq_align,
>>>>>>            .get_vq_group           = vdpasim_get_vq_group,
>>>>>>            .get_device_features    = vdpasim_get_device_features,
>>>>>> @@ -737,6 +813,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = {
>>>>>>            .get_vq_ready           = vdpasim_get_vq_ready,
>>>>>>            .set_vq_state           = vdpasim_set_vq_state,
>>>>>>            .get_vq_state           = vdpasim_get_vq_state,
>>>>>> +       .get_vq_notification    = vdpasim_get_vq_notification,
>>>>>>            .get_vq_align           = vdpasim_get_vq_align,
>>>>>>            .get_vq_group           = vdpasim_get_vq_group,
>>>>>>            .get_device_features    = vdpasim_get_device_features,
>>>>>> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h
>>>>>> index 0e78737dcc16..0769ccbd3911 100644
>>>>>> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.h
>>>>>> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h
>>>>>> @@ -69,6 +69,9 @@ struct vdpasim {
>>>>>>            bool running;
>>>>>>            /* spinlock to synchronize iommu table */
>>>>>>            spinlock_t iommu_lock;
>>>>>> +       /* dummy notify page */
>>>>>> +       unsigned long notify;
>>>>>> +       struct delayed_work notify_work;
>>>>>>     };
>>>>>>
>>>>>>     struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *attr,
>>>>>> --
>>>>>> 2.23.0
>>>>>>
>>>>>
>>>>> .
>>>>
>>>
>>> .
>>
> 
> .
  

Patch

diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index 7438a89ce939..07096a04dabb 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -14,6 +14,7 @@ 
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/dma-map-ops.h>
+#include <asm/set_memory.h>
 #include <linux/vringh.h>
 #include <linux/vdpa.h>
 #include <linux/vhost_iotlb.h>
@@ -36,9 +37,16 @@  module_param(max_iotlb_entries, int, 0444);
 MODULE_PARM_DESC(max_iotlb_entries,
 		 "Maximum number of iotlb entries for each address space. 0 means unlimited. (default: 2048)");
 
+static bool notify_passthrough;
+module_param(notify_passthrough, bool, 0444);
+MODULE_PARM_DESC(notify_passthrough,
+		 "Enable vq notify(doorbell) area mapping. (default: false)");
+
 #define VDPASIM_QUEUE_ALIGN PAGE_SIZE
 #define VDPASIM_QUEUE_MAX 256
 #define VDPASIM_VENDOR_ID 0
+#define VDPASIM_VRING_POLL_PERIOD 100 /* ms */
+#define VDPASIM_NOTIFY_DEFVAL 0xffff
 
 static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa)
 {
@@ -246,6 +254,28 @@  static const struct dma_map_ops vdpasim_dma_ops = {
 static const struct vdpa_config_ops vdpasim_config_ops;
 static const struct vdpa_config_ops vdpasim_batch_config_ops;
 
+static void vdpasim_notify_work(struct work_struct *work)
+{
+	struct vdpasim *vdpasim;
+	u16 *val;
+
+	vdpasim = container_of(work, struct vdpasim, notify_work.work);
+
+	if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
+		goto out;
+
+	if (!vdpasim->running)
+		goto out;
+
+	val = (u16 *)vdpasim->notify;
+	if (xchg(val, VDPASIM_NOTIFY_DEFVAL) != VDPASIM_NOTIFY_DEFVAL)
+		schedule_work(&vdpasim->work);
+
+out:
+	schedule_delayed_work(&vdpasim->notify_work,
+			      msecs_to_jiffies(VDPASIM_VRING_POLL_PERIOD));
+}
+
 struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
 			       const struct vdpa_dev_set_config *config)
 {
@@ -287,6 +317,18 @@  struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr,
 	set_dma_ops(dev, &vdpasim_dma_ops);
 	vdpasim->vdpa.mdev = dev_attr->mgmt_dev;
 
+	if (notify_passthrough) {
+		INIT_DELAYED_WORK(&vdpasim->notify_work, vdpasim_notify_work);
+
+		vdpasim->notify = __get_free_page(GFP_KERNEL | __GFP_ZERO);
+		if (!vdpasim->notify)
+			goto err_iommu;
+#ifdef CONFIG_X86
+		set_memory_uc(vdpasim->notify, 1);
+#endif
+		*(u16 *)vdpasim->notify = VDPASIM_NOTIFY_DEFVAL;
+	}
+
 	vdpasim->config = kzalloc(dev_attr->config_size, GFP_KERNEL);
 	if (!vdpasim->config)
 		goto err_iommu;
@@ -495,6 +537,18 @@  static u8 vdpasim_get_status(struct vdpa_device *vdpa)
 	return status;
 }
 
+static void vdpasim_set_notify_work(struct vdpasim *vdpasim, bool start)
+{
+	if (!notify_passthrough)
+		return;
+
+	if (start)
+		schedule_delayed_work(&vdpasim->notify_work,
+				      msecs_to_jiffies(VDPASIM_VRING_POLL_PERIOD));
+	else
+		cancel_delayed_work_sync(&vdpasim->notify_work);
+}
+
 static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status)
 {
 	struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
@@ -502,12 +556,14 @@  static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status)
 	spin_lock(&vdpasim->lock);
 	vdpasim->status = status;
 	spin_unlock(&vdpasim->lock);
+	vdpasim_set_notify_work(vdpasim, status & VIRTIO_CONFIG_S_DRIVER_OK);
 }
 
 static int vdpasim_reset(struct vdpa_device *vdpa, bool clear)
 {
 	struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
 
+	vdpasim_set_notify_work(vdpasim, false);
 	spin_lock(&vdpasim->lock);
 	vdpasim->status = 0;
 	vdpasim_do_reset(vdpasim);
@@ -672,11 +728,24 @@  static int vdpasim_dma_unmap(struct vdpa_device *vdpa, unsigned int asid,
 	return 0;
 }
 
+static struct vdpa_notification_area
+vdpasim_get_vq_notification(struct vdpa_device *vdpa, u16 qid)
+{
+	struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+	struct vdpa_notification_area notify;
+
+	notify.addr = virt_to_phys((void *)vdpasim->notify);
+	notify.size = PAGE_SIZE;
+
+	return notify;
+}
+
 static void vdpasim_free(struct vdpa_device *vdpa)
 {
 	struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
 	int i;
 
+	vdpasim_set_notify_work(vdpasim, false);
 	cancel_work_sync(&vdpasim->work);
 
 	for (i = 0; i < vdpasim->dev_attr.nvqs; i++) {
@@ -693,6 +762,12 @@  static void vdpasim_free(struct vdpa_device *vdpa)
 	vhost_iotlb_free(vdpasim->iommu);
 	kfree(vdpasim->vqs);
 	kfree(vdpasim->config);
+	if (vdpasim->notify) {
+#ifdef CONFIG_X86
+		set_memory_wb(vdpasim->notify, 1);
+#endif
+		free_page(vdpasim->notify);
+	}
 }
 
 static const struct vdpa_config_ops vdpasim_config_ops = {
@@ -704,6 +779,7 @@  static const struct vdpa_config_ops vdpasim_config_ops = {
 	.get_vq_ready           = vdpasim_get_vq_ready,
 	.set_vq_state           = vdpasim_set_vq_state,
 	.get_vq_state           = vdpasim_get_vq_state,
+	.get_vq_notification    = vdpasim_get_vq_notification,
 	.get_vq_align           = vdpasim_get_vq_align,
 	.get_vq_group           = vdpasim_get_vq_group,
 	.get_device_features    = vdpasim_get_device_features,
@@ -737,6 +813,7 @@  static const struct vdpa_config_ops vdpasim_batch_config_ops = {
 	.get_vq_ready           = vdpasim_get_vq_ready,
 	.set_vq_state           = vdpasim_set_vq_state,
 	.get_vq_state           = vdpasim_get_vq_state,
+	.get_vq_notification    = vdpasim_get_vq_notification,
 	.get_vq_align           = vdpasim_get_vq_align,
 	.get_vq_group           = vdpasim_get_vq_group,
 	.get_device_features    = vdpasim_get_device_features,
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h
index 0e78737dcc16..0769ccbd3911 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.h
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h
@@ -69,6 +69,9 @@  struct vdpasim {
 	bool running;
 	/* spinlock to synchronize iommu table */
 	spinlock_t iommu_lock;
+	/* dummy notify page */
+	unsigned long notify;
+	struct delayed_work notify_work;
 };
 
 struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *attr,