[v2,05/11] vduse: Introduce bound workqueue for irq injection

Message ID 20221205084127.535-6-xieyongji@bytedance.com
State New
Headers
Series VDUSE: Improve performance |

Commit Message

Yongji Xie Dec. 5, 2022, 8:41 a.m. UTC
  This introduces a bound workqueue to support running
irq callback in a specified cpu.

Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
---
 drivers/vdpa/vdpa_user/vduse_dev.c | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)
  

Comments

Jason Wang Dec. 16, 2022, 4:02 a.m. UTC | #1
On Mon, Dec 5, 2022 at 4:44 PM Xie Yongji <xieyongji@bytedance.com> wrote:
>
> This introduces a bound workqueue to support running
> irq callback in a specified cpu.
>
> Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
> ---
>  drivers/vdpa/vdpa_user/vduse_dev.c | 29 ++++++++++++++++++++++-------
>  1 file changed, 22 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
> index 37809bfcb7ef..d126f3e32a20 100644
> --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> @@ -57,6 +57,7 @@ struct vduse_virtqueue {
>         struct vdpa_callback cb;
>         struct work_struct inject;
>         struct work_struct kick;
> +       int irq_effective_cpu;

I wonder why it's a cpu number instead of a cpumask. The latter seems
more flexible, e.g when using NUMA.

>  };
>
>  struct vduse_dev;
> @@ -128,6 +129,7 @@ static struct class *vduse_class;
>  static struct cdev vduse_ctrl_cdev;
>  static struct cdev vduse_cdev;
>  static struct workqueue_struct *vduse_irq_wq;
> +static struct workqueue_struct *vduse_irq_bound_wq;
>
>  static u32 allowed_device_id[] = {
>         VIRTIO_ID_BLOCK,
> @@ -917,7 +919,8 @@ static void vduse_vq_irq_inject(struct work_struct *work)
>  }
>
>  static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> -                                   struct work_struct *irq_work)
> +                                   struct work_struct *irq_work,
> +                                   int irq_effective_cpu)
>  {
>         int ret = -EINVAL;
>
> @@ -926,7 +929,11 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
>                 goto unlock;
>
>         ret = 0;
> -       queue_work(vduse_irq_wq, irq_work);
> +       if (irq_effective_cpu == -1)

Is it better to have a macro for this magic number?

Thanks
  
Yongji Xie Dec. 19, 2022, 5:04 a.m. UTC | #2
On Fri, Dec 16, 2022 at 12:02 PM Jason Wang <jasowang@redhat.com> wrote:
>
> On Mon, Dec 5, 2022 at 4:44 PM Xie Yongji <xieyongji@bytedance.com> wrote:
> >
> > This introduces a bound workqueue to support running
> > irq callback in a specified cpu.
> >
> > Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
> > ---
> >  drivers/vdpa/vdpa_user/vduse_dev.c | 29 ++++++++++++++++++++++-------
> >  1 file changed, 22 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
> > index 37809bfcb7ef..d126f3e32a20 100644
> > --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> > @@ -57,6 +57,7 @@ struct vduse_virtqueue {
> >         struct vdpa_callback cb;
> >         struct work_struct inject;
> >         struct work_struct kick;
> > +       int irq_effective_cpu;
>
> I wonder why it's a cpu number instead of a cpumask. The latter seems
> more flexible, e.g when using NUMA.
>

This variable represents the CPU that runs the interrupt callback
rather than CPU affinity.

> >  };
> >
> >  struct vduse_dev;
> > @@ -128,6 +129,7 @@ static struct class *vduse_class;
> >  static struct cdev vduse_ctrl_cdev;
> >  static struct cdev vduse_cdev;
> >  static struct workqueue_struct *vduse_irq_wq;
> > +static struct workqueue_struct *vduse_irq_bound_wq;
> >
> >  static u32 allowed_device_id[] = {
> >         VIRTIO_ID_BLOCK,
> > @@ -917,7 +919,8 @@ static void vduse_vq_irq_inject(struct work_struct *work)
> >  }
> >
> >  static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> > -                                   struct work_struct *irq_work)
> > +                                   struct work_struct *irq_work,
> > +                                   int irq_effective_cpu)
> >  {
> >         int ret = -EINVAL;
> >
> > @@ -926,7 +929,11 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> >                 goto unlock;
> >
> >         ret = 0;
> > -       queue_work(vduse_irq_wq, irq_work);
> > +       if (irq_effective_cpu == -1)
>
> Is it better to have a macro for this magic number?
>

It makes sense to me.

Thanks,
Yongji
  
Jason Wang Dec. 20, 2022, 6:27 a.m. UTC | #3
On Mon, Dec 19, 2022 at 1:04 PM Yongji Xie <xieyongji@bytedance.com> wrote:
>
> On Fri, Dec 16, 2022 at 12:02 PM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Mon, Dec 5, 2022 at 4:44 PM Xie Yongji <xieyongji@bytedance.com> wrote:
> > >
> > > This introduces a bound workqueue to support running
> > > irq callback in a specified cpu.
> > >
> > > Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
> > > ---
> > >  drivers/vdpa/vdpa_user/vduse_dev.c | 29 ++++++++++++++++++++++-------
> > >  1 file changed, 22 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
> > > index 37809bfcb7ef..d126f3e32a20 100644
> > > --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> > > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> > > @@ -57,6 +57,7 @@ struct vduse_virtqueue {
> > >         struct vdpa_callback cb;
> > >         struct work_struct inject;
> > >         struct work_struct kick;
> > > +       int irq_effective_cpu;
> >
> > I wonder why it's a cpu number instead of a cpumask. The latter seems
> > more flexible, e.g when using NUMA.
> >
>
> This variable represents the CPU that runs the interrupt callback
> rather than CPU affinity.

Ok, but for some reason it only gets updated when a new affinity is set?

(Btw, I don't see how the code deals with cpu hotplug, do we need
cpuhot notifier?)

Thanks

>
> > >  };
> > >
> > >  struct vduse_dev;
> > > @@ -128,6 +129,7 @@ static struct class *vduse_class;
> > >  static struct cdev vduse_ctrl_cdev;
> > >  static struct cdev vduse_cdev;
> > >  static struct workqueue_struct *vduse_irq_wq;
> > > +static struct workqueue_struct *vduse_irq_bound_wq;
> > >
> > >  static u32 allowed_device_id[] = {
> > >         VIRTIO_ID_BLOCK,
> > > @@ -917,7 +919,8 @@ static void vduse_vq_irq_inject(struct work_struct *work)
> > >  }
> > >
> > >  static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> > > -                                   struct work_struct *irq_work)
> > > +                                   struct work_struct *irq_work,
> > > +                                   int irq_effective_cpu)
> > >  {
> > >         int ret = -EINVAL;
> > >
> > > @@ -926,7 +929,11 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> > >                 goto unlock;
> > >
> > >         ret = 0;
> > > -       queue_work(vduse_irq_wq, irq_work);
> > > +       if (irq_effective_cpu == -1)
> >
> > Is it better to have a macro for this magic number?
> >
>
> It makes sense to me.
>
> Thanks,
> Yongji
>
  
Yongji Xie Dec. 20, 2022, 10:01 a.m. UTC | #4
On Tue, Dec 20, 2022 at 2:28 PM Jason Wang <jasowang@redhat.com> wrote:
>
> On Mon, Dec 19, 2022 at 1:04 PM Yongji Xie <xieyongji@bytedance.com> wrote:
> >
> > On Fri, Dec 16, 2022 at 12:02 PM Jason Wang <jasowang@redhat.com> wrote:
> > >
> > > On Mon, Dec 5, 2022 at 4:44 PM Xie Yongji <xieyongji@bytedance.com> wrote:
> > > >
> > > > This introduces a bound workqueue to support running
> > > > irq callback in a specified cpu.
> > > >
> > > > Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
> > > > ---
> > > >  drivers/vdpa/vdpa_user/vduse_dev.c | 29 ++++++++++++++++++++++-------
> > > >  1 file changed, 22 insertions(+), 7 deletions(-)
> > > >
> > > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
> > > > index 37809bfcb7ef..d126f3e32a20 100644
> > > > --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> > > > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> > > > @@ -57,6 +57,7 @@ struct vduse_virtqueue {
> > > >         struct vdpa_callback cb;
> > > >         struct work_struct inject;
> > > >         struct work_struct kick;
> > > > +       int irq_effective_cpu;
> > >
> > > I wonder why it's a cpu number instead of a cpumask. The latter seems
> > > more flexible, e.g when using NUMA.
> > >
> >
> > This variable represents the CPU that runs the interrupt callback
> > rather than CPU affinity.
>
> Ok, but for some reason it only gets updated when a new affinity is set?
>

Yes, since we don't use round-robin now. And if affinity is not set,
we rollback to the default behavior (use un-bounded workqueue to run
irq callback).

> (Btw, I don't see how the code deals with cpu hotplug, do we need
> cpuhot notifier?)
>

Currently the queue_work_on() can handle the cpu hotplug case, so I
think we can simply check whether the CPU is online each time queuing
the kwork, then update the affinity if needed.

Thanks,
Yongji
  
Jason Wang Dec. 21, 2022, 3:19 a.m. UTC | #5
On Tue, Dec 20, 2022 at 6:02 PM Yongji Xie <xieyongji@bytedance.com> wrote:
>
> On Tue, Dec 20, 2022 at 2:28 PM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Mon, Dec 19, 2022 at 1:04 PM Yongji Xie <xieyongji@bytedance.com> wrote:
> > >
> > > On Fri, Dec 16, 2022 at 12:02 PM Jason Wang <jasowang@redhat.com> wrote:
> > > >
> > > > On Mon, Dec 5, 2022 at 4:44 PM Xie Yongji <xieyongji@bytedance.com> wrote:
> > > > >
> > > > > This introduces a bound workqueue to support running
> > > > > irq callback in a specified cpu.
> > > > >
> > > > > Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
> > > > > ---
> > > > >  drivers/vdpa/vdpa_user/vduse_dev.c | 29 ++++++++++++++++++++++-------
> > > > >  1 file changed, 22 insertions(+), 7 deletions(-)
> > > > >
> > > > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
> > > > > index 37809bfcb7ef..d126f3e32a20 100644
> > > > > --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> > > > > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> > > > > @@ -57,6 +57,7 @@ struct vduse_virtqueue {
> > > > >         struct vdpa_callback cb;
> > > > >         struct work_struct inject;
> > > > >         struct work_struct kick;
> > > > > +       int irq_effective_cpu;
> > > >
> > > > I wonder why it's a cpu number instead of a cpumask. The latter seems
> > > > more flexible, e.g when using NUMA.
> > > >
> > >
> > > This variable represents the CPU that runs the interrupt callback
> > > rather than CPU affinity.
> >
> > Ok, but for some reason it only gets updated when a new affinity is set?
> >
>
> Yes, since we don't use round-robin now. And if affinity is not set,
> we rollback to the default behavior (use un-bounded workqueue to run
> irq callback).
>
> > (Btw, I don't see how the code deals with cpu hotplug, do we need
> > cpuhot notifier?)
> >
>
> Currently the queue_work_on() can handle the cpu hotplug case, so I
> think we can simply check whether the CPU is online each time queuing
> the kwork, then update the affinity if needed.

Right.

Thanks

>
> Thanks,
> Yongji
>
  

Patch

diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
index 37809bfcb7ef..d126f3e32a20 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -57,6 +57,7 @@  struct vduse_virtqueue {
 	struct vdpa_callback cb;
 	struct work_struct inject;
 	struct work_struct kick;
+	int irq_effective_cpu;
 };
 
 struct vduse_dev;
@@ -128,6 +129,7 @@  static struct class *vduse_class;
 static struct cdev vduse_ctrl_cdev;
 static struct cdev vduse_cdev;
 static struct workqueue_struct *vduse_irq_wq;
+static struct workqueue_struct *vduse_irq_bound_wq;
 
 static u32 allowed_device_id[] = {
 	VIRTIO_ID_BLOCK,
@@ -917,7 +919,8 @@  static void vduse_vq_irq_inject(struct work_struct *work)
 }
 
 static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
-				    struct work_struct *irq_work)
+				    struct work_struct *irq_work,
+				    int irq_effective_cpu)
 {
 	int ret = -EINVAL;
 
@@ -926,7 +929,11 @@  static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
 		goto unlock;
 
 	ret = 0;
-	queue_work(vduse_irq_wq, irq_work);
+	if (irq_effective_cpu == -1)
+		queue_work(vduse_irq_wq, irq_work);
+	else
+		queue_work_on(irq_effective_cpu,
+			      vduse_irq_bound_wq, irq_work);
 unlock:
 	up_read(&dev->rwsem);
 
@@ -1111,7 +1118,7 @@  static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
 		break;
 	}
 	case VDUSE_DEV_INJECT_CONFIG_IRQ:
-		ret = vduse_dev_queue_irq_work(dev, &dev->inject);
+		ret = vduse_dev_queue_irq_work(dev, &dev->inject, -1);
 		break;
 	case VDUSE_VQ_SETUP: {
 		struct vduse_vq_config config;
@@ -1198,7 +1205,8 @@  static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
 			break;
 
 		index = array_index_nospec(index, dev->vq_num);
-		ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject);
+		ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject,
+					dev->vqs[index]->irq_effective_cpu);
 		break;
 	}
 	case VDUSE_IOTLB_REG_UMEM: {
@@ -1367,6 +1375,7 @@  static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
 			goto err;
 
 		dev->vqs[i]->index = i;
+		dev->vqs[i]->irq_effective_cpu = -1;
 		INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
 		INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
 		spin_lock_init(&dev->vqs[i]->kick_lock);
@@ -1855,12 +1864,15 @@  static int vduse_init(void)
 	if (ret)
 		goto err_cdev;
 
+	ret = -ENOMEM;
 	vduse_irq_wq = alloc_workqueue("vduse-irq",
 				WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
-	if (!vduse_irq_wq) {
-		ret = -ENOMEM;
+	if (!vduse_irq_wq)
 		goto err_wq;
-	}
+
+	vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0);
+	if (!vduse_irq_bound_wq)
+		goto err_bound_wq;
 
 	ret = vduse_domain_init();
 	if (ret)
@@ -1874,6 +1886,8 @@  static int vduse_init(void)
 err_mgmtdev:
 	vduse_domain_exit();
 err_domain:
+	destroy_workqueue(vduse_irq_bound_wq);
+err_bound_wq:
 	destroy_workqueue(vduse_irq_wq);
 err_wq:
 	cdev_del(&vduse_cdev);
@@ -1893,6 +1907,7 @@  static void vduse_exit(void)
 {
 	vduse_mgmtdev_exit();
 	vduse_domain_exit();
+	destroy_workqueue(vduse_irq_bound_wq);
 	destroy_workqueue(vduse_irq_wq);
 	cdev_del(&vduse_cdev);
 	device_destroy(vduse_class, vduse_major);