[v3,05/11] vduse: Support automatic irq callback affinity

Message ID 20230228094110.37-6-xieyongji@bytedance.com
State New
Headers
Series VDUSE: Improve performance |

Commit Message

Yongji Xie Feb. 28, 2023, 9:41 a.m. UTC
  This brings current interrupt affinity spreading mechanism
to vduse device. We will make use of group_cpus_evenly()
to create an irq callback affinity mask for each virtqueue of
vduse device. Then we will spread IRQs between CPUs in the affinity
mask, in a round-robin manner, to run the irq callback.

Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
---
 drivers/vdpa/vdpa_user/vduse_dev.c | 130 +++++++++++++++++++++++++++--
 1 file changed, 123 insertions(+), 7 deletions(-)
  

Comments

kernel test robot Feb. 28, 2023, 11:12 a.m. UTC | #1
Hi Xie,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on tip/irq/core]
[also build test WARNING on linus/master next-20230228]
[cannot apply to mst-vhost/linux-next v6.2]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Xie-Yongji/lib-group_cpus-Export-group_cpus_evenly/20230228-174438
patch link:    https://lore.kernel.org/r/20230228094110.37-6-xieyongji%40bytedance.com
patch subject: [PATCH v3 05/11] vduse: Support automatic irq callback affinity
config: m68k-allyesconfig (https://download.01.org/0day-ci/archive/20230228/202302281954.jRA7Qzq4-lkp@intel.com/config)
compiler: m68k-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/6c15cc28cb814c0e6cb80955bc59517e80c15ae2
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Xie-Yongji/lib-group_cpus-Export-group_cpus_evenly/20230228-174438
        git checkout 6c15cc28cb814c0e6cb80955bc59517e80c15ae2
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=m68k olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=m68k SHELL=/bin/bash drivers/vdpa/vdpa_user/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202302281954.jRA7Qzq4-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> drivers/vdpa/vdpa_user/vduse_dev.c:725:1: warning: no previous prototype for 'create_affinity_masks' [-Wmissing-prototypes]
     725 | create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
         | ^~~~~~~~~~~~~~~~~~~~~


vim +/create_affinity_masks +725 drivers/vdpa/vdpa_user/vduse_dev.c

   723	
   724	struct cpumask *
 > 725	create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
   726	{
   727		unsigned int affvecs = 0, curvec, usedvecs, i;
   728		struct cpumask *masks = NULL;
   729	
   730		if (nvecs > affd->pre_vectors + affd->post_vectors)
   731			affvecs = nvecs - affd->pre_vectors - affd->post_vectors;
   732	
   733		if (!affd->calc_sets)
   734			affd->calc_sets = default_calc_sets;
   735	
   736		affd->calc_sets(affd, affvecs);
   737	
   738		if (!affvecs)
   739			return NULL;
   740	
   741		masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL);
   742		if (!masks)
   743			return NULL;
   744	
   745		/* Fill out vectors at the beginning that don't need affinity */
   746		for (curvec = 0; curvec < affd->pre_vectors; curvec++)
   747			cpumask_setall(&masks[curvec]);
   748	
   749		for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) {
   750			unsigned int this_vecs = affd->set_size[i];
   751			int j;
   752			struct cpumask *result = group_cpus_evenly(this_vecs);
   753	
   754			if (!result) {
   755				kfree(masks);
   756				return NULL;
   757			}
   758	
   759			for (j = 0; j < this_vecs; j++)
   760				cpumask_copy(&masks[curvec + j], &result[j]);
   761			kfree(result);
   762	
   763			curvec += this_vecs;
   764			usedvecs += this_vecs;
   765		}
   766	
   767		/* Fill out vectors at the end that don't need affinity */
   768		if (usedvecs >= affvecs)
   769			curvec = affd->pre_vectors + affvecs;
   770		else
   771			curvec = affd->pre_vectors + usedvecs;
   772		for (; curvec < nvecs; curvec++)
   773			cpumask_setall(&masks[curvec]);
   774	
   775		return masks;
   776	}
   777
  
kernel test robot March 1, 2023, 1:18 a.m. UTC | #2
Hi Xie,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on tip/irq/core]
[also build test WARNING on linus/master next-20230228]
[cannot apply to mst-vhost/linux-next hch-configfs/for-next v6.2]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Xie-Yongji/lib-group_cpus-Export-group_cpus_evenly/20230228-174438
patch link:    https://lore.kernel.org/r/20230228094110.37-6-xieyongji%40bytedance.com
patch subject: [PATCH v3 05/11] vduse: Support automatic irq callback affinity
config: x86_64-randconfig-s021 (https://download.01.org/0day-ci/archive/20230301/202303010802.fyGx4T0d-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-8) 11.3.0
reproduce:
        # apt-get install sparse
        # sparse version: v0.6.4-39-gce1a6720-dirty
        # https://github.com/intel-lab-lkp/linux/commit/6c15cc28cb814c0e6cb80955bc59517e80c15ae2
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Xie-Yongji/lib-group_cpus-Export-group_cpus_evenly/20230228-174438
        git checkout 6c15cc28cb814c0e6cb80955bc59517e80c15ae2
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=x86_64 olddefconfig
        make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=x86_64 SHELL=/bin/bash drivers/vdpa/vdpa_user/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202303010802.fyGx4T0d-lkp@intel.com/

sparse warnings: (new ones prefixed by >>)
>> drivers/vdpa/vdpa_user/vduse_dev.c:724:16: sparse: sparse: symbol 'create_affinity_masks' was not declared. Should it be static?
  
Jason Wang March 16, 2023, 9:03 a.m. UTC | #3
在 2023/2/28 17:41, Xie Yongji 写道:
> This brings current interrupt affinity spreading mechanism
> to vduse device. We will make use of group_cpus_evenly()
> to create an irq callback affinity mask for each virtqueue of
> vduse device. Then we will spread IRQs between CPUs in the affinity
> mask, in a round-robin manner, to run the irq callback.
>
> Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
> ---
>   drivers/vdpa/vdpa_user/vduse_dev.c | 130 +++++++++++++++++++++++++++--
>   1 file changed, 123 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
> index 98359d87a06f..bde28a8692d5 100644
> --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> @@ -23,6 +23,8 @@
>   #include <linux/nospec.h>
>   #include <linux/vmalloc.h>
>   #include <linux/sched/mm.h>
> +#include <linux/interrupt.h>
> +#include <linux/group_cpus.h>
>   #include <uapi/linux/vduse.h>
>   #include <uapi/linux/vdpa.h>
>   #include <uapi/linux/virtio_config.h>
> @@ -41,6 +43,8 @@
>   #define VDUSE_IOVA_SIZE (128 * 1024 * 1024)
>   #define VDUSE_MSG_DEFAULT_TIMEOUT 30
>   
> +#define IRQ_UNBOUND -1
> +
>   struct vduse_virtqueue {
>   	u16 index;
>   	u16 num_max;
> @@ -57,6 +61,8 @@ struct vduse_virtqueue {
>   	struct vdpa_callback cb;
>   	struct work_struct inject;
>   	struct work_struct kick;
> +	int irq_effective_cpu;
> +	struct cpumask irq_affinity;
>   };
>   
>   struct vduse_dev;
> @@ -128,6 +134,7 @@ static struct class *vduse_class;
>   static struct cdev vduse_ctrl_cdev;
>   static struct cdev vduse_cdev;
>   static struct workqueue_struct *vduse_irq_wq;
> +static struct workqueue_struct *vduse_irq_bound_wq;
>   
>   static u32 allowed_device_id[] = {
>   	VIRTIO_ID_BLOCK,
> @@ -708,6 +715,82 @@ static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
>   	return dev->generation;
>   }
>   
> +static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs)
> +{
> +	affd->nr_sets = 1;
> +	affd->set_size[0] = affvecs;
> +}
> +
> +struct cpumask *
> +create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
> +{
> +	unsigned int affvecs = 0, curvec, usedvecs, i;
> +	struct cpumask *masks = NULL;
> +
> +	if (nvecs > affd->pre_vectors + affd->post_vectors)
> +		affvecs = nvecs - affd->pre_vectors - affd->post_vectors;
> +
> +	if (!affd->calc_sets)
> +		affd->calc_sets = default_calc_sets;
> +
> +	affd->calc_sets(affd, affvecs);
> +
> +	if (!affvecs)
> +		return NULL;
> +
> +	masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL);
> +	if (!masks)
> +		return NULL;
> +
> +	/* Fill out vectors at the beginning that don't need affinity */
> +	for (curvec = 0; curvec < affd->pre_vectors; curvec++)
> +		cpumask_setall(&masks[curvec]);
> +
> +	for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) {
> +		unsigned int this_vecs = affd->set_size[i];
> +		int j;
> +		struct cpumask *result = group_cpus_evenly(this_vecs);
> +
> +		if (!result) {
> +			kfree(masks);
> +			return NULL;
> +		}
> +
> +		for (j = 0; j < this_vecs; j++)
> +			cpumask_copy(&masks[curvec + j], &result[j]);
> +		kfree(result);
> +
> +		curvec += this_vecs;
> +		usedvecs += this_vecs;
> +	}
> +
> +	/* Fill out vectors at the end that don't need affinity */
> +	if (usedvecs >= affvecs)
> +		curvec = affd->pre_vectors + affvecs;
> +	else
> +		curvec = affd->pre_vectors + usedvecs;
> +	for (; curvec < nvecs; curvec++)
> +		cpumask_setall(&masks[curvec]);
> +
> +	return masks;
> +}
> +
> +static void vduse_vdpa_set_irq_affinity(struct vdpa_device *vdpa,
> +					struct irq_affinity *desc)
> +{
> +	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
> +	struct cpumask *masks;
> +	int i;
> +
> +	masks = create_affinity_masks(dev->vq_num, desc);
> +	if (!masks)
> +		return;
> +
> +	for (i = 0; i < dev->vq_num; i++)
> +		cpumask_copy(&dev->vqs[i]->irq_affinity, &masks[i]);
> +	kfree(masks);
> +}
> +
>   static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
>   				unsigned int asid,
>   				struct vhost_iotlb *iotlb)
> @@ -758,6 +841,7 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = {
>   	.get_config		= vduse_vdpa_get_config,
>   	.set_config		= vduse_vdpa_set_config,
>   	.get_generation		= vduse_vdpa_get_generation,
> +	.set_irq_affinity	= vduse_vdpa_set_irq_affinity,
>   	.reset			= vduse_vdpa_reset,
>   	.set_map		= vduse_vdpa_set_map,
>   	.free			= vduse_vdpa_free,
> @@ -917,7 +1001,8 @@ static void vduse_vq_irq_inject(struct work_struct *work)
>   }
>   
>   static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> -				    struct work_struct *irq_work)
> +				    struct work_struct *irq_work,
> +				    int irq_effective_cpu)
>   {
>   	int ret = -EINVAL;
>   
> @@ -926,7 +1011,11 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
>   		goto unlock;
>   
>   	ret = 0;
> -	queue_work(vduse_irq_wq, irq_work);
> +	if (irq_effective_cpu == IRQ_UNBOUND)
> +		queue_work(vduse_irq_wq, irq_work);
> +	else
> +		queue_work_on(irq_effective_cpu,
> +			      vduse_irq_bound_wq, irq_work);
>   unlock:
>   	up_read(&dev->rwsem);
>   
> @@ -1029,6 +1118,22 @@ static int vduse_dev_reg_umem(struct vduse_dev *dev,
>   	return ret;
>   }
>   
> +static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
> +{
> +	int curr_cpu = vq->irq_effective_cpu;
> +
> +	while (true) {
> +		curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity);
> +		if (cpu_online(curr_cpu))
> +			break;
> +
> +		if (curr_cpu >= nr_cpu_ids)
> +			curr_cpu = -1;


IRQ_UNBOUND?


> +	}
> +
> +	vq->irq_effective_cpu = curr_cpu;
> +}
> +
>   static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
>   			    unsigned long arg)
>   {
> @@ -1111,7 +1216,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
>   		break;
>   	}
>   	case VDUSE_DEV_INJECT_CONFIG_IRQ:
> -		ret = vduse_dev_queue_irq_work(dev, &dev->inject);
> +		ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND);
>   		break;
>   	case VDUSE_VQ_SETUP: {
>   		struct vduse_vq_config config;
> @@ -1198,7 +1303,10 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
>   			break;
>   
>   		index = array_index_nospec(index, dev->vq_num);
> -		ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject);
> +
> +		vduse_vq_update_effective_cpu(dev->vqs[index]);
> +		ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject,
> +					dev->vqs[index]->irq_effective_cpu);
>   		break;
>   	}
>   	case VDUSE_IOTLB_REG_UMEM: {
> @@ -1367,10 +1475,12 @@ static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
>   			goto err;
>   
>   		dev->vqs[i]->index = i;
> +		dev->vqs[i]->irq_effective_cpu = -1;


IRQ_UNBOUND?

Other looks good.

Thanks


>   		INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
>   		INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
>   		spin_lock_init(&dev->vqs[i]->kick_lock);
>   		spin_lock_init(&dev->vqs[i]->irq_lock);
> +		cpumask_setall(&dev->vqs[i]->irq_affinity);
>   	}
>   
>   	return 0;
> @@ -1858,12 +1968,15 @@ static int vduse_init(void)
>   	if (ret)
>   		goto err_cdev;
>   
> +	ret = -ENOMEM;
>   	vduse_irq_wq = alloc_workqueue("vduse-irq",
>   				WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
> -	if (!vduse_irq_wq) {
> -		ret = -ENOMEM;
> +	if (!vduse_irq_wq)
>   		goto err_wq;
> -	}
> +
> +	vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0);
> +	if (!vduse_irq_bound_wq)
> +		goto err_bound_wq;
>   
>   	ret = vduse_domain_init();
>   	if (ret)
> @@ -1877,6 +1990,8 @@ static int vduse_init(void)
>   err_mgmtdev:
>   	vduse_domain_exit();
>   err_domain:
> +	destroy_workqueue(vduse_irq_bound_wq);
> +err_bound_wq:
>   	destroy_workqueue(vduse_irq_wq);
>   err_wq:
>   	cdev_del(&vduse_cdev);
> @@ -1896,6 +2011,7 @@ static void vduse_exit(void)
>   {
>   	vduse_mgmtdev_exit();
>   	vduse_domain_exit();
> +	destroy_workqueue(vduse_irq_bound_wq);
>   	destroy_workqueue(vduse_irq_wq);
>   	cdev_del(&vduse_cdev);
>   	device_destroy(vduse_class, vduse_major);
  
Yongji Xie March 17, 2023, 7:04 a.m. UTC | #4
On Thu, Mar 16, 2023 at 5:03 PM Jason Wang <jasowang@redhat.com> wrote:
>
>
> 在 2023/2/28 17:41, Xie Yongji 写道:
> > This brings current interrupt affinity spreading mechanism
> > to vduse device. We will make use of group_cpus_evenly()
> > to create an irq callback affinity mask for each virtqueue of
> > vduse device. Then we will spread IRQs between CPUs in the affinity
> > mask, in a round-robin manner, to run the irq callback.
> >
> > Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
> > ---
> >   drivers/vdpa/vdpa_user/vduse_dev.c | 130 +++++++++++++++++++++++++++--
> >   1 file changed, 123 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
> > index 98359d87a06f..bde28a8692d5 100644
> > --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> > @@ -23,6 +23,8 @@
> >   #include <linux/nospec.h>
> >   #include <linux/vmalloc.h>
> >   #include <linux/sched/mm.h>
> > +#include <linux/interrupt.h>
> > +#include <linux/group_cpus.h>
> >   #include <uapi/linux/vduse.h>
> >   #include <uapi/linux/vdpa.h>
> >   #include <uapi/linux/virtio_config.h>
> > @@ -41,6 +43,8 @@
> >   #define VDUSE_IOVA_SIZE (128 * 1024 * 1024)
> >   #define VDUSE_MSG_DEFAULT_TIMEOUT 30
> >
> > +#define IRQ_UNBOUND -1
> > +
> >   struct vduse_virtqueue {
> >       u16 index;
> >       u16 num_max;
> > @@ -57,6 +61,8 @@ struct vduse_virtqueue {
> >       struct vdpa_callback cb;
> >       struct work_struct inject;
> >       struct work_struct kick;
> > +     int irq_effective_cpu;
> > +     struct cpumask irq_affinity;
> >   };
> >
> >   struct vduse_dev;
> > @@ -128,6 +134,7 @@ static struct class *vduse_class;
> >   static struct cdev vduse_ctrl_cdev;
> >   static struct cdev vduse_cdev;
> >   static struct workqueue_struct *vduse_irq_wq;
> > +static struct workqueue_struct *vduse_irq_bound_wq;
> >
> >   static u32 allowed_device_id[] = {
> >       VIRTIO_ID_BLOCK,
> > @@ -708,6 +715,82 @@ static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
> >       return dev->generation;
> >   }
> >
> > +static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs)
> > +{
> > +     affd->nr_sets = 1;
> > +     affd->set_size[0] = affvecs;
> > +}
> > +
> > +struct cpumask *
> > +create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
> > +{
> > +     unsigned int affvecs = 0, curvec, usedvecs, i;
> > +     struct cpumask *masks = NULL;
> > +
> > +     if (nvecs > affd->pre_vectors + affd->post_vectors)
> > +             affvecs = nvecs - affd->pre_vectors - affd->post_vectors;
> > +
> > +     if (!affd->calc_sets)
> > +             affd->calc_sets = default_calc_sets;
> > +
> > +     affd->calc_sets(affd, affvecs);
> > +
> > +     if (!affvecs)
> > +             return NULL;
> > +
> > +     masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL);
> > +     if (!masks)
> > +             return NULL;
> > +
> > +     /* Fill out vectors at the beginning that don't need affinity */
> > +     for (curvec = 0; curvec < affd->pre_vectors; curvec++)
> > +             cpumask_setall(&masks[curvec]);
> > +
> > +     for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) {
> > +             unsigned int this_vecs = affd->set_size[i];
> > +             int j;
> > +             struct cpumask *result = group_cpus_evenly(this_vecs);
> > +
> > +             if (!result) {
> > +                     kfree(masks);
> > +                     return NULL;
> > +             }
> > +
> > +             for (j = 0; j < this_vecs; j++)
> > +                     cpumask_copy(&masks[curvec + j], &result[j]);
> > +             kfree(result);
> > +
> > +             curvec += this_vecs;
> > +             usedvecs += this_vecs;
> > +     }
> > +
> > +     /* Fill out vectors at the end that don't need affinity */
> > +     if (usedvecs >= affvecs)
> > +             curvec = affd->pre_vectors + affvecs;
> > +     else
> > +             curvec = affd->pre_vectors + usedvecs;
> > +     for (; curvec < nvecs; curvec++)
> > +             cpumask_setall(&masks[curvec]);
> > +
> > +     return masks;
> > +}
> > +
> > +static void vduse_vdpa_set_irq_affinity(struct vdpa_device *vdpa,
> > +                                     struct irq_affinity *desc)
> > +{
> > +     struct vduse_dev *dev = vdpa_to_vduse(vdpa);
> > +     struct cpumask *masks;
> > +     int i;
> > +
> > +     masks = create_affinity_masks(dev->vq_num, desc);
> > +     if (!masks)
> > +             return;
> > +
> > +     for (i = 0; i < dev->vq_num; i++)
> > +             cpumask_copy(&dev->vqs[i]->irq_affinity, &masks[i]);
> > +     kfree(masks);
> > +}
> > +
> >   static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
> >                               unsigned int asid,
> >                               struct vhost_iotlb *iotlb)
> > @@ -758,6 +841,7 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops = {
> >       .get_config             = vduse_vdpa_get_config,
> >       .set_config             = vduse_vdpa_set_config,
> >       .get_generation         = vduse_vdpa_get_generation,
> > +     .set_irq_affinity       = vduse_vdpa_set_irq_affinity,
> >       .reset                  = vduse_vdpa_reset,
> >       .set_map                = vduse_vdpa_set_map,
> >       .free                   = vduse_vdpa_free,
> > @@ -917,7 +1001,8 @@ static void vduse_vq_irq_inject(struct work_struct *work)
> >   }
> >
> >   static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> > -                                 struct work_struct *irq_work)
> > +                                 struct work_struct *irq_work,
> > +                                 int irq_effective_cpu)
> >   {
> >       int ret = -EINVAL;
> >
> > @@ -926,7 +1011,11 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> >               goto unlock;
> >
> >       ret = 0;
> > -     queue_work(vduse_irq_wq, irq_work);
> > +     if (irq_effective_cpu == IRQ_UNBOUND)
> > +             queue_work(vduse_irq_wq, irq_work);
> > +     else
> > +             queue_work_on(irq_effective_cpu,
> > +                           vduse_irq_bound_wq, irq_work);
> >   unlock:
> >       up_read(&dev->rwsem);
> >
> > @@ -1029,6 +1118,22 @@ static int vduse_dev_reg_umem(struct vduse_dev *dev,
> >       return ret;
> >   }
> >
> > +static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
> > +{
> > +     int curr_cpu = vq->irq_effective_cpu;
> > +
> > +     while (true) {
> > +             curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity);
> > +             if (cpu_online(curr_cpu))
> > +                     break;
> > +
> > +             if (curr_cpu >= nr_cpu_ids)
> > +                     curr_cpu = -1;
>
>
> IRQ_UNBOUND?
>

Will fix it.

>
> > +     }
> > +
> > +     vq->irq_effective_cpu = curr_cpu;
> > +}
> > +
> >   static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
> >                           unsigned long arg)
> >   {
> > @@ -1111,7 +1216,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
> >               break;
> >       }
> >       case VDUSE_DEV_INJECT_CONFIG_IRQ:
> > -             ret = vduse_dev_queue_irq_work(dev, &dev->inject);
> > +             ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND);
> >               break;
> >       case VDUSE_VQ_SETUP: {
> >               struct vduse_vq_config config;
> > @@ -1198,7 +1303,10 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
> >                       break;
> >
> >               index = array_index_nospec(index, dev->vq_num);
> > -             ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject);
> > +
> > +             vduse_vq_update_effective_cpu(dev->vqs[index]);
> > +             ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject,
> > +                                     dev->vqs[index]->irq_effective_cpu);
> >               break;
> >       }
> >       case VDUSE_IOTLB_REG_UMEM: {
> > @@ -1367,10 +1475,12 @@ static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
> >                       goto err;
> >
> >               dev->vqs[i]->index = i;
> > +             dev->vqs[i]->irq_effective_cpu = -1;
>
>
> IRQ_UNBOUND?
>

Will fix it.

Thanks,
Yongji
  

Patch

diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
index 98359d87a06f..bde28a8692d5 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -23,6 +23,8 @@ 
 #include <linux/nospec.h>
 #include <linux/vmalloc.h>
 #include <linux/sched/mm.h>
+#include <linux/interrupt.h>
+#include <linux/group_cpus.h>
 #include <uapi/linux/vduse.h>
 #include <uapi/linux/vdpa.h>
 #include <uapi/linux/virtio_config.h>
@@ -41,6 +43,8 @@ 
 #define VDUSE_IOVA_SIZE (128 * 1024 * 1024)
 #define VDUSE_MSG_DEFAULT_TIMEOUT 30
 
+#define IRQ_UNBOUND -1
+
 struct vduse_virtqueue {
 	u16 index;
 	u16 num_max;
@@ -57,6 +61,8 @@  struct vduse_virtqueue {
 	struct vdpa_callback cb;
 	struct work_struct inject;
 	struct work_struct kick;
+	int irq_effective_cpu;
+	struct cpumask irq_affinity;
 };
 
 struct vduse_dev;
@@ -128,6 +134,7 @@  static struct class *vduse_class;
 static struct cdev vduse_ctrl_cdev;
 static struct cdev vduse_cdev;
 static struct workqueue_struct *vduse_irq_wq;
+static struct workqueue_struct *vduse_irq_bound_wq;
 
 static u32 allowed_device_id[] = {
 	VIRTIO_ID_BLOCK,
@@ -708,6 +715,82 @@  static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
 	return dev->generation;
 }
 
+static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs)
+{
+	affd->nr_sets = 1;
+	affd->set_size[0] = affvecs;
+}
+
+struct cpumask *
+create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
+{
+	unsigned int affvecs = 0, curvec, usedvecs, i;
+	struct cpumask *masks = NULL;
+
+	if (nvecs > affd->pre_vectors + affd->post_vectors)
+		affvecs = nvecs - affd->pre_vectors - affd->post_vectors;
+
+	if (!affd->calc_sets)
+		affd->calc_sets = default_calc_sets;
+
+	affd->calc_sets(affd, affvecs);
+
+	if (!affvecs)
+		return NULL;
+
+	masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL);
+	if (!masks)
+		return NULL;
+
+	/* Fill out vectors at the beginning that don't need affinity */
+	for (curvec = 0; curvec < affd->pre_vectors; curvec++)
+		cpumask_setall(&masks[curvec]);
+
+	for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) {
+		unsigned int this_vecs = affd->set_size[i];
+		int j;
+		struct cpumask *result = group_cpus_evenly(this_vecs);
+
+		if (!result) {
+			kfree(masks);
+			return NULL;
+		}
+
+		for (j = 0; j < this_vecs; j++)
+			cpumask_copy(&masks[curvec + j], &result[j]);
+		kfree(result);
+
+		curvec += this_vecs;
+		usedvecs += this_vecs;
+	}
+
+	/* Fill out vectors at the end that don't need affinity */
+	if (usedvecs >= affvecs)
+		curvec = affd->pre_vectors + affvecs;
+	else
+		curvec = affd->pre_vectors + usedvecs;
+	for (; curvec < nvecs; curvec++)
+		cpumask_setall(&masks[curvec]);
+
+	return masks;
+}
+
+static void vduse_vdpa_set_irq_affinity(struct vdpa_device *vdpa,
+					struct irq_affinity *desc)
+{
+	struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+	struct cpumask *masks;
+	int i;
+
+	masks = create_affinity_masks(dev->vq_num, desc);
+	if (!masks)
+		return;
+
+	for (i = 0; i < dev->vq_num; i++)
+		cpumask_copy(&dev->vqs[i]->irq_affinity, &masks[i]);
+	kfree(masks);
+}
+
 static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
 				unsigned int asid,
 				struct vhost_iotlb *iotlb)
@@ -758,6 +841,7 @@  static const struct vdpa_config_ops vduse_vdpa_config_ops = {
 	.get_config		= vduse_vdpa_get_config,
 	.set_config		= vduse_vdpa_set_config,
 	.get_generation		= vduse_vdpa_get_generation,
+	.set_irq_affinity	= vduse_vdpa_set_irq_affinity,
 	.reset			= vduse_vdpa_reset,
 	.set_map		= vduse_vdpa_set_map,
 	.free			= vduse_vdpa_free,
@@ -917,7 +1001,8 @@  static void vduse_vq_irq_inject(struct work_struct *work)
 }
 
 static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
-				    struct work_struct *irq_work)
+				    struct work_struct *irq_work,
+				    int irq_effective_cpu)
 {
 	int ret = -EINVAL;
 
@@ -926,7 +1011,11 @@  static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
 		goto unlock;
 
 	ret = 0;
-	queue_work(vduse_irq_wq, irq_work);
+	if (irq_effective_cpu == IRQ_UNBOUND)
+		queue_work(vduse_irq_wq, irq_work);
+	else
+		queue_work_on(irq_effective_cpu,
+			      vduse_irq_bound_wq, irq_work);
 unlock:
 	up_read(&dev->rwsem);
 
@@ -1029,6 +1118,22 @@  static int vduse_dev_reg_umem(struct vduse_dev *dev,
 	return ret;
 }
 
+static void vduse_vq_update_effective_cpu(struct vduse_virtqueue *vq)
+{
+	int curr_cpu = vq->irq_effective_cpu;
+
+	while (true) {
+		curr_cpu = cpumask_next(curr_cpu, &vq->irq_affinity);
+		if (cpu_online(curr_cpu))
+			break;
+
+		if (curr_cpu >= nr_cpu_ids)
+			curr_cpu = -1;
+	}
+
+	vq->irq_effective_cpu = curr_cpu;
+}
+
 static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
 			    unsigned long arg)
 {
@@ -1111,7 +1216,7 @@  static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
 		break;
 	}
 	case VDUSE_DEV_INJECT_CONFIG_IRQ:
-		ret = vduse_dev_queue_irq_work(dev, &dev->inject);
+		ret = vduse_dev_queue_irq_work(dev, &dev->inject, IRQ_UNBOUND);
 		break;
 	case VDUSE_VQ_SETUP: {
 		struct vduse_vq_config config;
@@ -1198,7 +1303,10 @@  static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
 			break;
 
 		index = array_index_nospec(index, dev->vq_num);
-		ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject);
+
+		vduse_vq_update_effective_cpu(dev->vqs[index]);
+		ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject,
+					dev->vqs[index]->irq_effective_cpu);
 		break;
 	}
 	case VDUSE_IOTLB_REG_UMEM: {
@@ -1367,10 +1475,12 @@  static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num)
 			goto err;
 
 		dev->vqs[i]->index = i;
+		dev->vqs[i]->irq_effective_cpu = -1;
 		INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
 		INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
 		spin_lock_init(&dev->vqs[i]->kick_lock);
 		spin_lock_init(&dev->vqs[i]->irq_lock);
+		cpumask_setall(&dev->vqs[i]->irq_affinity);
 	}
 
 	return 0;
@@ -1858,12 +1968,15 @@  static int vduse_init(void)
 	if (ret)
 		goto err_cdev;
 
+	ret = -ENOMEM;
 	vduse_irq_wq = alloc_workqueue("vduse-irq",
 				WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
-	if (!vduse_irq_wq) {
-		ret = -ENOMEM;
+	if (!vduse_irq_wq)
 		goto err_wq;
-	}
+
+	vduse_irq_bound_wq = alloc_workqueue("vduse-irq-bound", WQ_HIGHPRI, 0);
+	if (!vduse_irq_bound_wq)
+		goto err_bound_wq;
 
 	ret = vduse_domain_init();
 	if (ret)
@@ -1877,6 +1990,8 @@  static int vduse_init(void)
 err_mgmtdev:
 	vduse_domain_exit();
 err_domain:
+	destroy_workqueue(vduse_irq_bound_wq);
+err_bound_wq:
 	destroy_workqueue(vduse_irq_wq);
 err_wq:
 	cdev_del(&vduse_cdev);
@@ -1896,6 +2011,7 @@  static void vduse_exit(void)
 {
 	vduse_mgmtdev_exit();
 	vduse_domain_exit();
+	destroy_workqueue(vduse_irq_bound_wq);
 	destroy_workqueue(vduse_irq_wq);
 	cdev_del(&vduse_cdev);
 	device_destroy(vduse_class, vduse_major);