[v3,05/11] vduse: Support automatic irq callback affinity
Commit Message
This brings current interrupt affinity spreading mechanism
to vduse device. We will make use of group_cpus_evenly()
to create an irq callback affinity mask for each virtqueue of
vduse device. Then we will spread IRQs between CPUs in the affinity
mask, in a round-robin manner, to run the irq callback.
Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
---
drivers/vdpa/vdpa_user/vduse_dev.c | 130 +++++++++++++++++++++++++++--
1 file changed, 123 insertions(+), 7 deletions(-)
Comments
Hi Xie,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on tip/irq/core]
[also build test WARNING on linus/master next-20230228]
[cannot apply to mst-vhost/linux-next v6.2]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Xie-Yongji/lib-group_cpus-Export-group_cpus_evenly/20230228-174438
patch link: https://lore.kernel.org/r/20230228094110.37-6-xieyongji%40bytedance.com
patch subject: [PATCH v3 05/11] vduse: Support automatic irq callback affinity
config: m68k-allyesconfig (https://download.01.org/0day-ci/archive/20230228/202302281954.jRA7Qzq4-lkp@intel.com/config)
compiler: m68k-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/6c15cc28cb814c0e6cb80955bc59517e80c15ae2
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Xie-Yongji/lib-group_cpus-Export-group_cpus_evenly/20230228-174438
git checkout 6c15cc28cb814c0e6cb80955bc59517e80c15ae2
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=m68k olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=m68k SHELL=/bin/bash drivers/vdpa/vdpa_user/
If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202302281954.jRA7Qzq4-lkp@intel.com/
All warnings (new ones prefixed by >>):
>> drivers/vdpa/vdpa_user/vduse_dev.c:725:1: warning: no previous prototype for 'create_affinity_masks' [-Wmissing-prototypes]
725 | create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
| ^~~~~~~~~~~~~~~~~~~~~
vim +/create_affinity_masks +725 drivers/vdpa/vdpa_user/vduse_dev.c
723
724 struct cpumask *
> 725 create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
726 {
727 unsigned int affvecs = 0, curvec, usedvecs, i;
728 struct cpumask *masks = NULL;
729
730 if (nvecs > affd->pre_vectors + affd->post_vectors)
731 affvecs = nvecs - affd->pre_vectors - affd->post_vectors;
732
733 if (!affd->calc_sets)
734 affd->calc_sets = default_calc_sets;
735
736 affd->calc_sets(affd, affvecs);
737
738 if (!affvecs)
739 return NULL;
740
741 masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL);
742 if (!masks)
743 return NULL;
744
745 /* Fill out vectors at the beginning that don't need affinity */
746 for (curvec = 0; curvec < affd->pre_vectors; curvec++)
747 cpumask_setall(&masks[curvec]);
748
749 for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) {
750 unsigned int this_vecs = affd->set_size[i];
751 int j;
752 struct cpumask *result = group_cpus_evenly(this_vecs);
753
754 if (!result) {
755 kfree(masks);
756 return NULL;
757 }
758
759 for (j = 0; j < this_vecs; j++)
760 cpumask_copy(&masks[curvec + j], &result[j]);
761 kfree(result);
762
763 curvec += this_vecs;
764 usedvecs += this_vecs;
765 }
766
767 /* Fill out vectors at the end that don't need affinity */
768 if (usedvecs >= affvecs)
769 curvec = affd->pre_vectors + affvecs;
770 else
771 curvec = affd->pre_vectors + usedvecs;
772 for (; curvec < nvecs; curvec++)
773 cpumask_setall(&masks[curvec]);
774
775 return masks;
776 }
777
Hi Xie,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on tip/irq/core]
[also build test WARNING on linus/master next-20230228]
[cannot apply to mst-vhost/linux-next hch-configfs/for-next v6.2]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Xie-Yongji/lib-group_cpus-Export-group_cpus_evenly/20230228-174438
patch link: https://lore.kernel.org/r/20230228094110.37-6-xieyongji%40bytedance.com
patch subject: [PATCH v3 05/11] vduse: Support automatic irq callback affinity
config: x86_64-randconfig-s021 (https://download.01.org/0day-ci/archive/20230301/202303010802.fyGx4T0d-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-8) 11.3.0
reproduce:
# apt-get install sparse
# sparse version: v0.6.4-39-gce1a6720-dirty
# https://github.com/intel-lab-lkp/linux/commit/6c15cc28cb814c0e6cb80955bc59517e80c15ae2
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Xie-Yongji/lib-group_cpus-Export-group_cpus_evenly/20230228-174438
git checkout 6c15cc28cb814c0e6cb80955bc59517e80c15ae2
# save the config file
mkdir build_dir && cp config build_dir/.config
make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=x86_64 olddefconfig
make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=x86_64 SHELL=/bin/bash drivers/vdpa/vdpa_user/
If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202303010802.fyGx4T0d-lkp@intel.com/
sparse warnings: (new ones prefixed by >>)
>> drivers/vdpa/vdpa_user/vduse_dev.c:724:16: sparse: sparse: symbol 'create_affinity_masks' was not declared. Should it be static?
在 2023/2/28 17:41, Xie Yongji 写道:
> This brings current interrupt affinity spreading mechanism
> to vduse device. We will make use of group_cpus_evenly()
> to create an irq callback affinity mask for each virtqueue of
> vduse device. Then we will spread IRQs between CPUs in the affinity
> mask, in a round-robin manner, to run the irq callback.
>
> Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
> ---
> drivers/vdpa/vdpa_user/vduse_dev.c | 130 +++++++++++++++++++++++++++--
> 1 file changed, 123 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
> index 98359d87a06f..bde28a8692d5 100644
> --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> @@ -23,6 +23,8 @@
> #include <linux/nospec.h>
> #include <linux/vmalloc.h>
> #include <linux/sched/mm.h>
> +#include <linux/interrupt.h>
> +#include <linux/group_cpus.h>
> #include <uapi/linux/vduse.h>
> #include <uapi/linux/vdpa.h>
> #include <uapi/linux/virtio_config.h>
> @@ -41,6 +43,8 @@
> #define VDUSE_IOVA_SIZE (128 * 1024 * 1024)
> #define VDUSE_MSG_DEFAULT_TIMEOUT 30
>
> +#define IRQ_UNBOUND -1
> +
> struct vduse_virtqueue {
> u16 index;
> u16 num_max;
> @@ -57,6 +61,8 @@ struct vduse_virtqueue {
> struct vdpa_callback cb;
> struct work_struct inject;
> struct work_struct kick;
> + int irq_effective_cpu;
> + struct cpumask irq_affinity;
> };
>
> struct vduse_dev;
> @@ -128,6 +134,7 @@ static struct class *vduse_class;
> static struct cdev vduse_ctrl_cdev;
> static struct cdev vduse_cdev;
> static struct workqueue_struct *vduse_irq_wq;
> +static struct workqueue_struct *vduse_irq_bound_wq;
>
> static u32 allowed_device_id[] = {
> VIRTIO_ID_BLOCK,
> @@ -708,6 +715,82 @@ static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
> return dev->generation;
> }
>
> +static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs)
> +{
> + affd->nr_sets = 1;
> + affd->set_size[0] = affvecs;
> +}
> +
> +struct cpumask *
> +create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
> +{
> + unsigned int affvecs = 0, curvec, usedvecs, i;
> + struct cpumask *masks = NULL;
> +
> + if (nvecs > affd->pre_vectors + affd->post_vectors)
> + affvecs = nvecs - affd->pre_vectors - affd->post_vectors;
> +
> + if (!affd->calc_sets)
> + affd->calc_sets = default_calc_sets;
> +
> + affd->calc_sets(affd, affvecs);
> +
> + if (!affvecs)
> + return NULL;
> +
> + masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL);
> + if (!masks)
> + return NULL;
> +
> + /* Fill out vectors at the beginning that don't need affinity */
> + for (curvec = 0; curvec < affd->pre_vectors; curvec++)
> + cpumask_setall(&masks[curvec]);
> +
> + for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) {
> + unsigned int this_vecs = affd->set_size[i];
> + int j;
> + struct cpumask *result = group_cpus_evenly(this_vecs);
> +
> + if (!result) {
> + kfree(masks);
> + return NULL;
> + }
> +
> + for (j = 0; j < this_vecs; j++)
> + cpumask_copy(&masks[curvec + j], &result[j]);
> + kfree(result);
> +
> + curvec += this_vecs;
> + usedvecs += this_vecs;
> + }
> +
> + /* Fill out vectors at the end that don't need affinity */
> + if (usedvecs >= affvecs)
> + curvec = affd->pre_vectors + affvecs;
> + else
> + curvec = affd->pre_vectors + usedvecs;
> + for (; curvec < nvecs; curvec++)
> + cpumask_setall(&masks[curvec]);
> +
> + return masks;
> +}
> +
> +static void vduse_vdpa_set_irq_affinity(struct vdpa_device *vdpa,
> + struct irq_affinity *desc)
> +{
> + struct vduse_dev *dev = vdpa_to_vduse(vdpa);
> + struct cpumask *masks;
> + int i;
> +
> + masks = create_affinity_masks(dev->vq_num, desc);
> + if (!masks)
> + return;
> +
> + for (i = 0; i < dev->vq_num; i++)
> + cpumask_copy(&dev->vqs[i]->irq_affinity, &masks[i]);
> + kfree(masks);
> +}
> +
> static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
> unsigned int asid,
> struct vhost_iotlb *iotlb)
> @@ -758,6 +841,7 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = {
> .get_config = vduse_vdpa_get_config,
> .set_config = vduse_vdpa_set_config,
> .get_generation = vduse_vdpa_get_generation,
> + .set_irq_affinity = vduse_vdpa_set_irq_affinity,
> .reset = vduse_vdpa_reset,
> .set_map = vduse_vdpa_set_map,
> .free = vduse_vdpa_free,
> @@ -917,7 +1001,8 @@ static void vduse_vq_irq_inject(struct work_struct *work)
> }
>
> static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> - struct work_struct *irq_work)
> + struct work_struct *irq_work,
> + int irq_effective_cpu)
> {
> int ret = -EINVAL;
>
> @@ -926,7 +1011,11 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> goto unlock;
>
> ret = 0;
> - queue_work(vduse_irq_wq, irq_work);
> + if (irq_effective_cpu == IRQ_UNBOUND)
> + queue_work(vduse_irq_wq, irq_work);
> + else
> + queue_work_on(irq_effective_cpu,
> + vduse_irq_bound_wq, irq_work);
> unlock:
> up_read(&dev->rwsem);
>
> @@ -1029,6 +1118,22 @@ static int vduse_dev_reg_umem(struct vduse_dev *dev,
> return ret;
> }
>
> +static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
> +{
> + int curr_cpu = vq->irq_effective_cpu;
> +
> + while (true) {
> + curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity);
> + if (cpu_online(curr_cpu))
> + break;
> +
> + if (curr_cpu >= nr_cpu_ids)
> + curr_cpu = -1;
IRQ_UNBOUND?
> + }
> +
> + vq->irq_effective_cpu = curr_cpu;
> +}
> +
> static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
> unsigned long arg)
> {
> @@ -1111,7 +1216,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
> break;
> }
> case VDUSE_DEV_INJECT_CONFIG_IRQ:
> - ret = vduse_dev_queue_irq_work(dev, &dev->inject);
> + ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND);
> break;
> case VDUSE_VQ_SETUP: {
> struct vduse_vq_config config;
> @@ -1198,7 +1303,10 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
> break;
>
> index = array_index_nospec(index, dev->vq_num);
> - ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject);
> +
> + vduse_vq_update_effective_cpu(dev->vqs[index]);
> + ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject,
> + dev->vqs[index]->irq_effective_cpu);
> break;
> }
> case VDUSE_IOTLB_REG_UMEM: {
> @@ -1367,10 +1475,12 @@ static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
> goto err;
>
> dev->vqs[i]->index = i;
> + dev->vqs[i]->irq_effective_cpu = -1;
IRQ_UNBOUND?
Other looks good.
Thanks
> INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
> INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
> spin_lock_init(&dev->vqs[i]->kick_lock);
> spin_lock_init(&dev->vqs[i]->irq_lock);
> + cpumask_setall(&dev->vqs[i]->irq_affinity);
> }
>
> return 0;
> @@ -1858,12 +1968,15 @@ static int vduse_init(void)
> if (ret)
> goto err_cdev;
>
> + ret = -ENOMEM;
> vduse_irq_wq = alloc_workqueue("vduse-irq",
> WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
> - if (!vduse_irq_wq) {
> - ret = -ENOMEM;
> + if (!vduse_irq_wq)
> goto err_wq;
> - }
> +
> + vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0);
> + if (!vduse_irq_bound_wq)
> + goto err_bound_wq;
>
> ret = vduse_domain_init();
> if (ret)
> @@ -1877,6 +1990,8 @@ static int vduse_init(void)
> err_mgmtdev:
> vduse_domain_exit();
> err_domain:
> + destroy_workqueue(vduse_irq_bound_wq);
> +err_bound_wq:
> destroy_workqueue(vduse_irq_wq);
> err_wq:
> cdev_del(&vduse_cdev);
> @@ -1896,6 +2011,7 @@ static void vduse_exit(void)
> {
> vduse_mgmtdev_exit();
> vduse_domain_exit();
> + destroy_workqueue(vduse_irq_bound_wq);
> destroy_workqueue(vduse_irq_wq);
> cdev_del(&vduse_cdev);
> device_destroy(vduse_class, vduse_major);
On Thu, Mar 16, 2023 at 5:03 PM Jason Wang <jasowang@redhat.com> wrote:
>
>
> 在 2023/2/28 17:41, Xie Yongji 写道:
> > This brings current interrupt affinity spreading mechanism
> > to vduse device. We will make use of group_cpus_evenly()
> > to create an irq callback affinity mask for each virtqueue of
> > vduse device. Then we will spread IRQs between CPUs in the affinity
> > mask, in a round-robin manner, to run the irq callback.
> >
> > Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
> > ---
> > drivers/vdpa/vdpa_user/vduse_dev.c | 130 +++++++++++++++++++++++++++--
> > 1 file changed, 123 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
> > index 98359d87a06f..bde28a8692d5 100644
> > --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> > @@ -23,6 +23,8 @@
> > #include <linux/nospec.h>
> > #include <linux/vmalloc.h>
> > #include <linux/sched/mm.h>
> > +#include <linux/interrupt.h>
> > +#include <linux/group_cpus.h>
> > #include <uapi/linux/vduse.h>
> > #include <uapi/linux/vdpa.h>
> > #include <uapi/linux/virtio_config.h>
> > @@ -41,6 +43,8 @@
> > #define VDUSE_IOVA_SIZE (128 * 1024 * 1024)
> > #define VDUSE_MSG_DEFAULT_TIMEOUT 30
> >
> > +#define IRQ_UNBOUND -1
> > +
> > struct vduse_virtqueue {
> > u16 index;
> > u16 num_max;
> > @@ -57,6 +61,8 @@ struct vduse_virtqueue {
> > struct vdpa_callback cb;
> > struct work_struct inject;
> > struct work_struct kick;
> > + int irq_effective_cpu;
> > + struct cpumask irq_affinity;
> > };
> >
> > struct vduse_dev;
> > @@ -128,6 +134,7 @@ static struct class *vduse_class;
> > static struct cdev vduse_ctrl_cdev;
> > static struct cdev vduse_cdev;
> > static struct workqueue_struct *vduse_irq_wq;
> > +static struct workqueue_struct *vduse_irq_bound_wq;
> >
> > static u32 allowed_device_id[] = {
> > VIRTIO_ID_BLOCK,
> > @@ -708,6 +715,82 @@ static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
> > return dev->generation;
> > }
> >
> > +static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs)
> > +{
> > + affd->nr_sets = 1;
> > + affd->set_size[0] = affvecs;
> > +}
> > +
> > +struct cpumask *
> > +create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
> > +{
> > + unsigned int affvecs = 0, curvec, usedvecs, i;
> > + struct cpumask *masks = NULL;
> > +
> > + if (nvecs > affd->pre_vectors + affd->post_vectors)
> > + affvecs = nvecs - affd->pre_vectors - affd->post_vectors;
> > +
> > + if (!affd->calc_sets)
> > + affd->calc_sets = default_calc_sets;
> > +
> > + affd->calc_sets(affd, affvecs);
> > +
> > + if (!affvecs)
> > + return NULL;
> > +
> > + masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL);
> > + if (!masks)
> > + return NULL;
> > +
> > + /* Fill out vectors at the beginning that don't need affinity */
> > + for (curvec = 0; curvec < affd->pre_vectors; curvec++)
> > + cpumask_setall(&masks[curvec]);
> > +
> > + for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) {
> > + unsigned int this_vecs = affd->set_size[i];
> > + int j;
> > + struct cpumask *result = group_cpus_evenly(this_vecs);
> > +
> > + if (!result) {
> > + kfree(masks);
> > + return NULL;
> > + }
> > +
> > + for (j = 0; j < this_vecs; j++)
> > + cpumask_copy(&masks[curvec + j], &result[j]);
> > + kfree(result);
> > +
> > + curvec += this_vecs;
> > + usedvecs += this_vecs;
> > + }
> > +
> > + /* Fill out vectors at the end that don't need affinity */
> > + if (usedvecs >= affvecs)
> > + curvec = affd->pre_vectors + affvecs;
> > + else
> > + curvec = affd->pre_vectors + usedvecs;
> > + for (; curvec < nvecs; curvec++)
> > + cpumask_setall(&masks[curvec]);
> > +
> > + return masks;
> > +}
> > +
> > +static void vduse_vdpa_set_irq_affinity(struct vdpa_device *vdpa,
> > + struct irq_affinity *desc)
> > +{
> > + struct vduse_dev *dev = vdpa_to_vduse(vdpa);
> > + struct cpumask *masks;
> > + int i;
> > +
> > + masks = create_affinity_masks(dev->vq_num, desc);
> > + if (!masks)
> > + return;
> > +
> > + for (i = 0; i < dev->vq_num; i++)
> > + cpumask_copy(&dev->vqs[i]->irq_affinity, &masks[i]);
> > + kfree(masks);
> > +}
> > +
> > static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
> > unsigned int asid,
> > struct vhost_iotlb *iotlb)
> > @@ -758,6 +841,7 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = {
> > .get_config = vduse_vdpa_get_config,
> > .set_config = vduse_vdpa_set_config,
> > .get_generation = vduse_vdpa_get_generation,
> > + .set_irq_affinity = vduse_vdpa_set_irq_affinity,
> > .reset = vduse_vdpa_reset,
> > .set_map = vduse_vdpa_set_map,
> > .free = vduse_vdpa_free,
> > @@ -917,7 +1001,8 @@ static void vduse_vq_irq_inject(struct work_struct *work)
> > }
> >
> > static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> > - struct work_struct *irq_work)
> > + struct work_struct *irq_work,
> > + int irq_effective_cpu)
> > {
> > int ret = -EINVAL;
> >
> > @@ -926,7 +1011,11 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> > goto unlock;
> >
> > ret = 0;
> > - queue_work(vduse_irq_wq, irq_work);
> > + if (irq_effective_cpu == IRQ_UNBOUND)
> > + queue_work(vduse_irq_wq, irq_work);
> > + else
> > + queue_work_on(irq_effective_cpu,
> > + vduse_irq_bound_wq, irq_work);
> > unlock:
> > up_read(&dev->rwsem);
> >
> > @@ -1029,6 +1118,22 @@ static int vduse_dev_reg_umem(struct vduse_dev *dev,
> > return ret;
> > }
> >
> > +static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
> > +{
> > + int curr_cpu = vq->irq_effective_cpu;
> > +
> > + while (true) {
> > + curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity);
> > + if (cpu_online(curr_cpu))
> > + break;
> > +
> > + if (curr_cpu >= nr_cpu_ids)
> > + curr_cpu = -1;
>
>
> IRQ_UNBOUND?
>
Will fix it.
>
> > + }
> > +
> > + vq->irq_effective_cpu = curr_cpu;
> > +}
> > +
> > static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
> > unsigned long arg)
> > {
> > @@ -1111,7 +1216,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
> > break;
> > }
> > case VDUSE_DEV_INJECT_CONFIG_IRQ:
> > - ret = vduse_dev_queue_irq_work(dev, &dev->inject);
> > + ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND);
> > break;
> > case VDUSE_VQ_SETUP: {
> > struct vduse_vq_config config;
> > @@ -1198,7 +1303,10 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
> > break;
> >
> > index = array_index_nospec(index, dev->vq_num);
> > - ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject);
> > +
> > + vduse_vq_update_effective_cpu(dev->vqs[index]);
> > + ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject,
> > + dev->vqs[index]->irq_effective_cpu);
> > break;
> > }
> > case VDUSE_IOTLB_REG_UMEM: {
> > @@ -1367,10 +1475,12 @@ static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
> > goto err;
> >
> > dev->vqs[i]->index = i;
> > + dev->vqs[i]->irq_effective_cpu = -1;
>
>
> IRQ_UNBOUND?
>
Will fix it.
Thanks,
Yongji
@@ -23,6 +23,8 @@
#include <linux/nospec.h>
#include <linux/vmalloc.h>
#include <linux/sched/mm.h>
+#include <linux/interrupt.h>
+#include <linux/group_cpus.h>
#include <uapi/linux/vduse.h>
#include <uapi/linux/vdpa.h>
#include <uapi/linux/virtio_config.h>
@@ -41,6 +43,8 @@
#define VDUSE_IOVA_SIZE (128 * 1024 * 1024)
#define VDUSE_MSG_DEFAULT_TIMEOUT 30
+#define IRQ_UNBOUND -1
+
struct vduse_virtqueue {
u16 index;
u16 num_max;
@@ -57,6 +61,8 @@ struct vduse_virtqueue {
struct vdpa_callback cb;
struct work_struct inject;
struct work_struct kick;
+ int irq_effective_cpu;
+ struct cpumask irq_affinity;
};
struct vduse_dev;
@@ -128,6 +134,7 @@ static struct class *vduse_class;
static struct cdev vduse_ctrl_cdev;
static struct cdev vduse_cdev;
static struct workqueue_struct *vduse_irq_wq;
+static struct workqueue_struct *vduse_irq_bound_wq;
static u32 allowed_device_id[] = {
VIRTIO_ID_BLOCK,
@@ -708,6 +715,82 @@ static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
return dev->generation;
}
+static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs)
+{
+ affd->nr_sets = 1;
+ affd->set_size[0] = affvecs;
+}
+
+struct cpumask *
+create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
+{
+ unsigned int affvecs = 0, curvec, usedvecs, i;
+ struct cpumask *masks = NULL;
+
+ if (nvecs > affd->pre_vectors + affd->post_vectors)
+ affvecs = nvecs - affd->pre_vectors - affd->post_vectors;
+
+ if (!affd->calc_sets)
+ affd->calc_sets = default_calc_sets;
+
+ affd->calc_sets(affd, affvecs);
+
+ if (!affvecs)
+ return NULL;
+
+ masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL);
+ if (!masks)
+ return NULL;
+
+ /* Fill out vectors at the beginning that don't need affinity */
+ for (curvec = 0; curvec < affd->pre_vectors; curvec++)
+ cpumask_setall(&masks[curvec]);
+
+ for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) {
+ unsigned int this_vecs = affd->set_size[i];
+ int j;
+ struct cpumask *result = group_cpus_evenly(this_vecs);
+
+ if (!result) {
+ kfree(masks);
+ return NULL;
+ }
+
+ for (j = 0; j < this_vecs; j++)
+ cpumask_copy(&masks[curvec + j], &result[j]);
+ kfree(result);
+
+ curvec += this_vecs;
+ usedvecs += this_vecs;
+ }
+
+ /* Fill out vectors at the end that don't need affinity */
+ if (usedvecs >= affvecs)
+ curvec = affd->pre_vectors + affvecs;
+ else
+ curvec = affd->pre_vectors + usedvecs;
+ for (; curvec < nvecs; curvec++)
+ cpumask_setall(&masks[curvec]);
+
+ return masks;
+}
+
+static void vduse_vdpa_set_irq_affinity(struct vdpa_device *vdpa,
+ struct irq_affinity *desc)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+ struct cpumask *masks;
+ int i;
+
+ masks = create_affinity_masks(dev->vq_num, desc);
+ if (!masks)
+ return;
+
+ for (i = 0; i < dev->vq_num; i++)
+ cpumask_copy(&dev->vqs[i]->irq_affinity, &masks[i]);
+ kfree(masks);
+}
+
static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
unsigned int asid,
struct vhost_iotlb *iotlb)
@@ -758,6 +841,7 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = {
.get_config = vduse_vdpa_get_config,
.set_config = vduse_vdpa_set_config,
.get_generation = vduse_vdpa_get_generation,
+ .set_irq_affinity = vduse_vdpa_set_irq_affinity,
.reset = vduse_vdpa_reset,
.set_map = vduse_vdpa_set_map,
.free = vduse_vdpa_free,
@@ -917,7 +1001,8 @@ static void vduse_vq_irq_inject(struct work_struct *work)
}
static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
- struct work_struct *irq_work)
+ struct work_struct *irq_work,
+ int irq_effective_cpu)
{
int ret = -EINVAL;
@@ -926,7 +1011,11 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
goto unlock;
ret = 0;
- queue_work(vduse_irq_wq, irq_work);
+ if (irq_effective_cpu == IRQ_UNBOUND)
+ queue_work(vduse_irq_wq, irq_work);
+ else
+ queue_work_on(irq_effective_cpu,
+ vduse_irq_bound_wq, irq_work);
unlock:
up_read(&dev->rwsem);
@@ -1029,6 +1118,22 @@ static int vduse_dev_reg_umem(struct vduse_dev *dev,
return ret;
}
+static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
+{
+ int curr_cpu = vq->irq_effective_cpu;
+
+ while (true) {
+ curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity);
+ if (cpu_online(curr_cpu))
+ break;
+
+ if (curr_cpu >= nr_cpu_ids)
+ curr_cpu = -1;
+ }
+
+ vq->irq_effective_cpu = curr_cpu;
+}
+
static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
@@ -1111,7 +1216,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
break;
}
case VDUSE_DEV_INJECT_CONFIG_IRQ:
- ret = vduse_dev_queue_irq_work(dev, &dev->inject);
+ ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND);
break;
case VDUSE_VQ_SETUP: {
struct vduse_vq_config config;
@@ -1198,7 +1303,10 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
break;
index = array_index_nospec(index, dev->vq_num);
- ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject);
+
+ vduse_vq_update_effective_cpu(dev->vqs[index]);
+ ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject,
+ dev->vqs[index]->irq_effective_cpu);
break;
}
case VDUSE_IOTLB_REG_UMEM: {
@@ -1367,10 +1475,12 @@ static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
goto err;
dev->vqs[i]->index = i;
+ dev->vqs[i]->irq_effective_cpu = -1;
INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
spin_lock_init(&dev->vqs[i]->kick_lock);
spin_lock_init(&dev->vqs[i]->irq_lock);
+ cpumask_setall(&dev->vqs[i]->irq_affinity);
}
return 0;
@@ -1858,12 +1968,15 @@ static int vduse_init(void)
if (ret)
goto err_cdev;
+ ret = -ENOMEM;
vduse_irq_wq = alloc_workqueue("vduse-irq",
WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
- if (!vduse_irq_wq) {
- ret = -ENOMEM;
+ if (!vduse_irq_wq)
goto err_wq;
- }
+
+ vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0);
+ if (!vduse_irq_bound_wq)
+ goto err_bound_wq;
ret = vduse_domain_init();
if (ret)
@@ -1877,6 +1990,8 @@ static int vduse_init(void)
err_mgmtdev:
vduse_domain_exit();
err_domain:
+ destroy_workqueue(vduse_irq_bound_wq);
+err_bound_wq:
destroy_workqueue(vduse_irq_wq);
err_wq:
cdev_del(&vduse_cdev);
@@ -1896,6 +2011,7 @@ static void vduse_exit(void)
{
vduse_mgmtdev_exit();
vduse_domain_exit();
+ destroy_workqueue(vduse_irq_bound_wq);
destroy_workqueue(vduse_irq_wq);
cdev_del(&vduse_cdev);
device_destroy(vduse_class, vduse_major);