[v4,05/11] vduse: Support set_vq_affinity callback
Commit Message
Since virtio-vdpa bus driver already support interrupt
affinity spreading mechanism, let's implement the
set_vq_affinity callback to bring it to vduse device.
After we get the virtqueue's affinity, we can spread
IRQs between CPUs in the affinity mask, in a round-robin
manner, to run the irq callback.
Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
---
drivers/vdpa/vdpa_user/vduse_dev.c | 61 ++++++++++++++++++++++++++----
1 file changed, 54 insertions(+), 7 deletions(-)
Comments
在 2023/3/23 13:30, Xie Yongji 写道:
> Since virtio-vdpa bus driver already support interrupt
> affinity spreading mechanism, let's implement the
> set_vq_affinity callback to bring it to vduse device.
> After we get the virtqueue's affinity, we can spread
> IRQs between CPUs in the affinity mask, in a round-robin
> manner, to run the irq callback.
>
> Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Thanks
> ---
> drivers/vdpa/vdpa_user/vduse_dev.c | 61 ++++++++++++++++++++++++++----
> 1 file changed, 54 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
> index 98359d87a06f..45aa8703c4b5 100644
> --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> @@ -41,6 +41,8 @@
> #define VDUSE_IOVA_SIZE (128 * 1024 * 1024)
> #define VDUSE_MSG_DEFAULT_TIMEOUT 30
>
> +#define IRQ_UNBOUND -1
> +
> struct vduse_virtqueue {
> u16 index;
> u16 num_max;
> @@ -57,6 +59,8 @@ struct vduse_virtqueue {
> struct vdpa_callback cb;
> struct work_struct inject;
> struct work_struct kick;
> + int irq_effective_cpu;
> + struct cpumask irq_affinity;
> };
>
> struct vduse_dev;
> @@ -128,6 +132,7 @@ static struct class *vduse_class;
> static struct cdev vduse_ctrl_cdev;
> static struct cdev vduse_cdev;
> static struct workqueue_struct *vduse_irq_wq;
> +static struct workqueue_struct *vduse_irq_bound_wq;
>
> static u32 allowed_device_id[] = {
> VIRTIO_ID_BLOCK,
> @@ -708,6 +713,15 @@ static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
> return dev->generation;
> }
>
> +static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx,
> + const struct cpumask *cpu_mask)
> +{
> + struct vduse_dev *dev = vdpa_to_vduse(vdpa);
> +
> + cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask);
> + return 0;
> +}
> +
> static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
> unsigned int asid,
> struct vhost_iotlb *iotlb)
> @@ -758,6 +772,7 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = {
> .get_config = vduse_vdpa_get_config,
> .set_config = vduse_vdpa_set_config,
> .get_generation = vduse_vdpa_get_generation,
> + .set_vq_affinity = vduse_vdpa_set_vq_affinity,
> .reset = vduse_vdpa_reset,
> .set_map = vduse_vdpa_set_map,
> .free = vduse_vdpa_free,
> @@ -917,7 +932,8 @@ static void vduse_vq_irq_inject(struct work_struct *work)
> }
>
> static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> - struct work_struct *irq_work)
> + struct work_struct *irq_work,
> + int irq_effective_cpu)
> {
> int ret = -EINVAL;
>
> @@ -926,7 +942,11 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> goto unlock;
>
> ret = 0;
> - queue_work(vduse_irq_wq, irq_work);
> + if (irq_effective_cpu == IRQ_UNBOUND)
> + queue_work(vduse_irq_wq, irq_work);
> + else
> + queue_work_on(irq_effective_cpu,
> + vduse_irq_bound_wq, irq_work);
> unlock:
> up_read(&dev->rwsem);
>
> @@ -1029,6 +1049,22 @@ static int vduse_dev_reg_umem(struct vduse_dev *dev,
> return ret;
> }
>
> +static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
> +{
> + int curr_cpu = vq->irq_effective_cpu;
> +
> + while (true) {
> + curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity);
> + if (cpu_online(curr_cpu))
> + break;
> +
> + if (curr_cpu >= nr_cpu_ids)
> + curr_cpu = IRQ_UNBOUND;
> + }
> +
> + vq->irq_effective_cpu = curr_cpu;
> +}
> +
> static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
> unsigned long arg)
> {
> @@ -1111,7 +1147,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
> break;
> }
> case VDUSE_DEV_INJECT_CONFIG_IRQ:
> - ret = vduse_dev_queue_irq_work(dev, &dev->inject);
> + ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND);
> break;
> case VDUSE_VQ_SETUP: {
> struct vduse_vq_config config;
> @@ -1198,7 +1234,10 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
> break;
>
> index = array_index_nospec(index, dev->vq_num);
> - ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject);
> +
> + vduse_vq_update_effective_cpu(dev->vqs[index]);
> + ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject,
> + dev->vqs[index]->irq_effective_cpu);
> break;
> }
> case VDUSE_IOTLB_REG_UMEM: {
> @@ -1367,10 +1406,12 @@ static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
> goto err;
>
> dev->vqs[i]->index = i;
> + dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND;
> INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
> INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
> spin_lock_init(&dev->vqs[i]->kick_lock);
> spin_lock_init(&dev->vqs[i]->irq_lock);
> + cpumask_setall(&dev->vqs[i]->irq_affinity);
> }
>
> return 0;
> @@ -1858,12 +1899,15 @@ static int vduse_init(void)
> if (ret)
> goto err_cdev;
>
> + ret = -ENOMEM;
> vduse_irq_wq = alloc_workqueue("vduse-irq",
> WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
> - if (!vduse_irq_wq) {
> - ret = -ENOMEM;
> + if (!vduse_irq_wq)
> goto err_wq;
> - }
> +
> + vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0);
> + if (!vduse_irq_bound_wq)
> + goto err_bound_wq;
>
> ret = vduse_domain_init();
> if (ret)
> @@ -1877,6 +1921,8 @@ static int vduse_init(void)
> err_mgmtdev:
> vduse_domain_exit();
> err_domain:
> + destroy_workqueue(vduse_irq_bound_wq);
> +err_bound_wq:
> destroy_workqueue(vduse_irq_wq);
> err_wq:
> cdev_del(&vduse_cdev);
> @@ -1896,6 +1942,7 @@ static void vduse_exit(void)
> {
> vduse_mgmtdev_exit();
> vduse_domain_exit();
> + destroy_workqueue(vduse_irq_bound_wq);
> destroy_workqueue(vduse_irq_wq);
> cdev_del(&vduse_cdev);
> device_destroy(vduse_class, vduse_major);
@@ -41,6 +41,8 @@
#define VDUSE_IOVA_SIZE (128 * 1024 * 1024)
#define VDUSE_MSG_DEFAULT_TIMEOUT 30
+#define IRQ_UNBOUND -1
+
struct vduse_virtqueue {
u16 index;
u16 num_max;
@@ -57,6 +59,8 @@ struct vduse_virtqueue {
struct vdpa_callback cb;
struct work_struct inject;
struct work_struct kick;
+ int irq_effective_cpu;
+ struct cpumask irq_affinity;
};
struct vduse_dev;
@@ -128,6 +132,7 @@ static struct class *vduse_class;
static struct cdev vduse_ctrl_cdev;
static struct cdev vduse_cdev;
static struct workqueue_struct *vduse_irq_wq;
+static struct workqueue_struct *vduse_irq_bound_wq;
static u32 allowed_device_id[] = {
VIRTIO_ID_BLOCK,
@@ -708,6 +713,15 @@ static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
return dev->generation;
}
+static int vduse_vdpa_set_vq_affinity(struct vdpa_device *vdpa, u16 idx,
+ const struct cpumask *cpu_mask)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+ cpumask_copy(&dev->vqs[idx]->irq_affinity, cpu_mask);
+ return 0;
+}
+
static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
unsigned int asid,
struct vhost_iotlb *iotlb)
@@ -758,6 +772,7 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = {
.get_config = vduse_vdpa_get_config,
.set_config = vduse_vdpa_set_config,
.get_generation = vduse_vdpa_get_generation,
+ .set_vq_affinity = vduse_vdpa_set_vq_affinity,
.reset = vduse_vdpa_reset,
.set_map = vduse_vdpa_set_map,
.free = vduse_vdpa_free,
@@ -917,7 +932,8 @@ static void vduse_vq_irq_inject(struct work_struct *work)
}
static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
- struct work_struct *irq_work)
+ struct work_struct *irq_work,
+ int irq_effective_cpu)
{
int ret = -EINVAL;
@@ -926,7 +942,11 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
goto unlock;
ret = 0;
- queue_work(vduse_irq_wq, irq_work);
+ if (irq_effective_cpu == IRQ_UNBOUND)
+ queue_work(vduse_irq_wq, irq_work);
+ else
+ queue_work_on(irq_effective_cpu,
+ vduse_irq_bound_wq, irq_work);
unlock:
up_read(&dev->rwsem);
@@ -1029,6 +1049,22 @@ static int vduse_dev_reg_umem(struct vduse_dev *dev,
return ret;
}
+static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
+{
+ int curr_cpu = vq->irq_effective_cpu;
+
+ while (true) {
+ curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity);
+ if (cpu_online(curr_cpu))
+ break;
+
+ if (curr_cpu >= nr_cpu_ids)
+ curr_cpu = IRQ_UNBOUND;
+ }
+
+ vq->irq_effective_cpu = curr_cpu;
+}
+
static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
@@ -1111,7 +1147,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
break;
}
case VDUSE_DEV_INJECT_CONFIG_IRQ:
- ret = vduse_dev_queue_irq_work(dev, &dev->inject);
+ ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND);
break;
case VDUSE_VQ_SETUP: {
struct vduse_vq_config config;
@@ -1198,7 +1234,10 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
break;
index = array_index_nospec(index, dev->vq_num);
- ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject);
+
+ vduse_vq_update_effective_cpu(dev->vqs[index]);
+ ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject,
+ dev->vqs[index]->irq_effective_cpu);
break;
}
case VDUSE_IOTLB_REG_UMEM: {
@@ -1367,10 +1406,12 @@ static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
goto err;
dev->vqs[i]->index = i;
+ dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND;
INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
spin_lock_init(&dev->vqs[i]->kick_lock);
spin_lock_init(&dev->vqs[i]->irq_lock);
+ cpumask_setall(&dev->vqs[i]->irq_affinity);
}
return 0;
@@ -1858,12 +1899,15 @@ static int vduse_init(void)
if (ret)
goto err_cdev;
+ ret = -ENOMEM;
vduse_irq_wq = alloc_workqueue("vduse-irq",
WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
- if (!vduse_irq_wq) {
- ret = -ENOMEM;
+ if (!vduse_irq_wq)
goto err_wq;
- }
+
+ vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0);
+ if (!vduse_irq_bound_wq)
+ goto err_bound_wq;
ret = vduse_domain_init();
if (ret)
@@ -1877,6 +1921,8 @@ static int vduse_init(void)
err_mgmtdev:
vduse_domain_exit();
err_domain:
+ destroy_workqueue(vduse_irq_bound_wq);
+err_bound_wq:
destroy_workqueue(vduse_irq_wq);
err_wq:
cdev_del(&vduse_cdev);
@@ -1896,6 +1942,7 @@ static void vduse_exit(void)
{
vduse_mgmtdev_exit();
vduse_domain_exit();
+ destroy_workqueue(vduse_irq_bound_wq);
destroy_workqueue(vduse_irq_wq);
cdev_del(&vduse_cdev);
device_destroy(vduse_class, vduse_major);