[v2,05/11] vduse: Introduce bound workqueue for irq injection
Commit Message
This introduces a bound workqueue to support running
irq callback in a specified cpu.
Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
---
drivers/vdpa/vdpa_user/vduse_dev.c | 29 ++++++++++++++++++++++-------
1 file changed, 22 insertions(+), 7 deletions(-)
Comments
On Mon, Dec 5, 2022 at 4:44 PM Xie Yongji <xieyongji@bytedance.com> wrote:
>
> This introduces a bound workqueue to support running
> irq callback in a specified cpu.
>
> Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
> ---
> drivers/vdpa/vdpa_user/vduse_dev.c | 29 ++++++++++++++++++++++-------
> 1 file changed, 22 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
> index 37809bfcb7ef..d126f3e32a20 100644
> --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> @@ -57,6 +57,7 @@ struct vduse_virtqueue {
> struct vdpa_callback cb;
> struct work_struct inject;
> struct work_struct kick;
> + int irq_effective_cpu;
I wonder why it's a cpu number instead of a cpumask. The latter seems
more flexible, e.g when using NUMA.
> };
>
> struct vduse_dev;
> @@ -128,6 +129,7 @@ static struct class *vduse_class;
> static struct cdev vduse_ctrl_cdev;
> static struct cdev vduse_cdev;
> static struct workqueue_struct *vduse_irq_wq;
> +static struct workqueue_struct *vduse_irq_bound_wq;
>
> static u32 allowed_device_id[] = {
> VIRTIO_ID_BLOCK,
> @@ -917,7 +919,8 @@ static void vduse_vq_irq_inject(struct work_struct *work)
> }
>
> static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> - struct work_struct *irq_work)
> + struct work_struct *irq_work,
> + int irq_effective_cpu)
> {
> int ret = -EINVAL;
>
> @@ -926,7 +929,11 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> goto unlock;
>
> ret = 0;
> - queue_work(vduse_irq_wq, irq_work);
> + if (irq_effective_cpu == -1)
Is it better to have a macro for this magic number?
Thanks
On Fri, Dec 16, 2022 at 12:02 PM Jason Wang <jasowang@redhat.com> wrote:
>
> On Mon, Dec 5, 2022 at 4:44 PM Xie Yongji <xieyongji@bytedance.com> wrote:
> >
> > This introduces a bound workqueue to support running
> > irq callback in a specified cpu.
> >
> > Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
> > ---
> > drivers/vdpa/vdpa_user/vduse_dev.c | 29 ++++++++++++++++++++++-------
> > 1 file changed, 22 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
> > index 37809bfcb7ef..d126f3e32a20 100644
> > --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> > @@ -57,6 +57,7 @@ struct vduse_virtqueue {
> > struct vdpa_callback cb;
> > struct work_struct inject;
> > struct work_struct kick;
> > + int irq_effective_cpu;
>
> I wonder why it's a cpu number instead of a cpumask. The latter seems
> more flexible, e.g when using NUMA.
>
This variable represents the CPU that runs the interrupt callback
rather than CPU affinity.
> > };
> >
> > struct vduse_dev;
> > @@ -128,6 +129,7 @@ static struct class *vduse_class;
> > static struct cdev vduse_ctrl_cdev;
> > static struct cdev vduse_cdev;
> > static struct workqueue_struct *vduse_irq_wq;
> > +static struct workqueue_struct *vduse_irq_bound_wq;
> >
> > static u32 allowed_device_id[] = {
> > VIRTIO_ID_BLOCK,
> > @@ -917,7 +919,8 @@ static void vduse_vq_irq_inject(struct work_struct *work)
> > }
> >
> > static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> > - struct work_struct *irq_work)
> > + struct work_struct *irq_work,
> > + int irq_effective_cpu)
> > {
> > int ret = -EINVAL;
> >
> > @@ -926,7 +929,11 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> > goto unlock;
> >
> > ret = 0;
> > - queue_work(vduse_irq_wq, irq_work);
> > + if (irq_effective_cpu == -1)
>
> Is it better to have a macro for this magic number?
>
It makes sense to me.
Thanks,
Yongji
On Mon, Dec 19, 2022 at 1:04 PM Yongji Xie <xieyongji@bytedance.com> wrote:
>
> On Fri, Dec 16, 2022 at 12:02 PM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Mon, Dec 5, 2022 at 4:44 PM Xie Yongji <xieyongji@bytedance.com> wrote:
> > >
> > > This introduces a bound workqueue to support running
> > > irq callback in a specified cpu.
> > >
> > > Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
> > > ---
> > > drivers/vdpa/vdpa_user/vduse_dev.c | 29 ++++++++++++++++++++++-------
> > > 1 file changed, 22 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
> > > index 37809bfcb7ef..d126f3e32a20 100644
> > > --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> > > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> > > @@ -57,6 +57,7 @@ struct vduse_virtqueue {
> > > struct vdpa_callback cb;
> > > struct work_struct inject;
> > > struct work_struct kick;
> > > + int irq_effective_cpu;
> >
> > I wonder why it's a cpu number instead of a cpumask. The latter seems
> > more flexible, e.g when using NUMA.
> >
>
> This variable represents the CPU that runs the interrupt callback
> rather than CPU affinity.
Ok, but for some reason it only gets updated when a new affinity is set?
(Btw, I don't see how the code deals with cpu hotplug, do we need
cpuhot notifier?)
Thanks
>
> > > };
> > >
> > > struct vduse_dev;
> > > @@ -128,6 +129,7 @@ static struct class *vduse_class;
> > > static struct cdev vduse_ctrl_cdev;
> > > static struct cdev vduse_cdev;
> > > static struct workqueue_struct *vduse_irq_wq;
> > > +static struct workqueue_struct *vduse_irq_bound_wq;
> > >
> > > static u32 allowed_device_id[] = {
> > > VIRTIO_ID_BLOCK,
> > > @@ -917,7 +919,8 @@ static void vduse_vq_irq_inject(struct work_struct *work)
> > > }
> > >
> > > static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> > > - struct work_struct *irq_work)
> > > + struct work_struct *irq_work,
> > > + int irq_effective_cpu)
> > > {
> > > int ret = -EINVAL;
> > >
> > > @@ -926,7 +929,11 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> > > goto unlock;
> > >
> > > ret = 0;
> > > - queue_work(vduse_irq_wq, irq_work);
> > > + if (irq_effective_cpu == -1)
> >
> > Is it better to have a macro for this magic number?
> >
>
> It makes sense to me.
>
> Thanks,
> Yongji
>
On Tue, Dec 20, 2022 at 2:28 PM Jason Wang <jasowang@redhat.com> wrote:
>
> On Mon, Dec 19, 2022 at 1:04 PM Yongji Xie <xieyongji@bytedance.com> wrote:
> >
> > On Fri, Dec 16, 2022 at 12:02 PM Jason Wang <jasowang@redhat.com> wrote:
> > >
> > > On Mon, Dec 5, 2022 at 4:44 PM Xie Yongji <xieyongji@bytedance.com> wrote:
> > > >
> > > > This introduces a bound workqueue to support running
> > > > irq callback in a specified cpu.
> > > >
> > > > Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
> > > > ---
> > > > drivers/vdpa/vdpa_user/vduse_dev.c | 29 ++++++++++++++++++++++-------
> > > > 1 file changed, 22 insertions(+), 7 deletions(-)
> > > >
> > > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
> > > > index 37809bfcb7ef..d126f3e32a20 100644
> > > > --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> > > > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> > > > @@ -57,6 +57,7 @@ struct vduse_virtqueue {
> > > > struct vdpa_callback cb;
> > > > struct work_struct inject;
> > > > struct work_struct kick;
> > > > + int irq_effective_cpu;
> > >
> > > I wonder why it's a cpu number instead of a cpumask. The latter seems
> > > more flexible, e.g when using NUMA.
> > >
> >
> > This variable represents the CPU that runs the interrupt callback
> > rather than CPU affinity.
>
> Ok, but for some reason it only gets updated when a new affinity is set?
>
Yes, since we don't use round-robin now. And if affinity is not set,
we rollback to the default behavior (use un-bounded workqueue to run
irq callback).
> (Btw, I don't see how the code deals with cpu hotplug, do we need
> cpuhot notifier?)
>
Currently the queue_work_on() can handle the cpu hotplug case, so I
think we can simply check whether the CPU is online each time queuing
the kwork, then update the affinity if needed.
Thanks,
Yongji
On Tue, Dec 20, 2022 at 6:02 PM Yongji Xie <xieyongji@bytedance.com> wrote:
>
> On Tue, Dec 20, 2022 at 2:28 PM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Mon, Dec 19, 2022 at 1:04 PM Yongji Xie <xieyongji@bytedance.com> wrote:
> > >
> > > On Fri, Dec 16, 2022 at 12:02 PM Jason Wang <jasowang@redhat.com> wrote:
> > > >
> > > > On Mon, Dec 5, 2022 at 4:44 PM Xie Yongji <xieyongji@bytedance.com> wrote:
> > > > >
> > > > > This introduces a bound workqueue to support running
> > > > > irq callback in a specified cpu.
> > > > >
> > > > > Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
> > > > > ---
> > > > > drivers/vdpa/vdpa_user/vduse_dev.c | 29 ++++++++++++++++++++++-------
> > > > > 1 file changed, 22 insertions(+), 7 deletions(-)
> > > > >
> > > > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
> > > > > index 37809bfcb7ef..d126f3e32a20 100644
> > > > > --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> > > > > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> > > > > @@ -57,6 +57,7 @@ struct vduse_virtqueue {
> > > > > struct vdpa_callback cb;
> > > > > struct work_struct inject;
> > > > > struct work_struct kick;
> > > > > + int irq_effective_cpu;
> > > >
> > > > I wonder why it's a cpu number instead of a cpumask. The latter seems
> > > > more flexible, e.g when using NUMA.
> > > >
> > >
> > > This variable represents the CPU that runs the interrupt callback
> > > rather than CPU affinity.
> >
> > Ok, but for some reason it only gets updated when a new affinity is set?
> >
>
> Yes, since we don't use round-robin now. And if affinity is not set,
> we rollback to the default behavior (use un-bounded workqueue to run
> irq callback).
>
> > (Btw, I don't see how the code deals with cpu hotplug, do we need
> > cpuhot notifier?)
> >
>
> Currently the queue_work_on() can handle the cpu hotplug case, so I
> think we can simply check whether the CPU is online each time queuing
> the kwork, then update the affinity if needed.
Right.
Thanks
>
> Thanks,
> Yongji
>
@@ -57,6 +57,7 @@ struct vduse_virtqueue {
struct vdpa_callback cb;
struct work_struct inject;
struct work_struct kick;
+ int irq_effective_cpu;
};
struct vduse_dev;
@@ -128,6 +129,7 @@ static struct class *vduse_class;
static struct cdev vduse_ctrl_cdev;
static struct cdev vduse_cdev;
static struct workqueue_struct *vduse_irq_wq;
+static struct workqueue_struct *vduse_irq_bound_wq;
static u32 allowed_device_id[] = {
VIRTIO_ID_BLOCK,
@@ -917,7 +919,8 @@ static void vduse_vq_irq_inject(struct work_struct *work)
}
static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
- struct work_struct *irq_work)
+ struct work_struct *irq_work,
+ int irq_effective_cpu)
{
int ret = -EINVAL;
@@ -926,7 +929,11 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
goto unlock;
ret = 0;
- queue_work(vduse_irq_wq, irq_work);
+ if (irq_effective_cpu == -1)
+ queue_work(vduse_irq_wq, irq_work);
+ else
+ queue_work_on(irq_effective_cpu,
+ vduse_irq_bound_wq, irq_work);
unlock:
up_read(&dev->rwsem);
@@ -1111,7 +1118,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
break;
}
case VDUSE_DEV_INJECT_CONFIG_IRQ:
- ret = vduse_dev_queue_irq_work(dev, &dev->inject);
+ ret = vduse_dev_queue_irq_work(dev, &dev->inject, -1);
break;
case VDUSE_VQ_SETUP: {
struct vduse_vq_config config;
@@ -1198,7 +1205,8 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
break;
index = array_index_nospec(index, dev->vq_num);
- ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject);
+ ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject,
+ dev->vqs[index]->irq_effective_cpu);
break;
}
case VDUSE_IOTLB_REG_UMEM: {
@@ -1367,6 +1375,7 @@ static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
goto err;
dev->vqs[i]->index = i;
+ dev->vqs[i]->irq_effective_cpu = -1;
INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
spin_lock_init(&dev->vqs[i]->kick_lock);
@@ -1855,12 +1864,15 @@ static int vduse_init(void)
if (ret)
goto err_cdev;
+ ret = -ENOMEM;
vduse_irq_wq = alloc_workqueue("vduse-irq",
WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
- if (!vduse_irq_wq) {
- ret = -ENOMEM;
+ if (!vduse_irq_wq)
goto err_wq;
- }
+
+ vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0);
+ if (!vduse_irq_bound_wq)
+ goto err_bound_wq;
ret = vduse_domain_init();
if (ret)
@@ -1874,6 +1886,8 @@ static int vduse_init(void)
err_mgmtdev:
vduse_domain_exit();
err_domain:
+ destroy_workqueue(vduse_irq_bound_wq);
+err_bound_wq:
destroy_workqueue(vduse_irq_wq);
err_wq:
cdev_del(&vduse_cdev);
@@ -1893,6 +1907,7 @@ static void vduse_exit(void)
{
vduse_mgmtdev_exit();
vduse_domain_exit();
+ destroy_workqueue(vduse_irq_bound_wq);
destroy_workqueue(vduse_irq_wq);
cdev_del(&vduse_cdev);
device_destroy(vduse_class, vduse_major);