[RESEND,1/4] iommu/arm-smmu-v3: Add support for ECMDQ register mode

Message ID 20230721063513.33431-2-tanmay@marvell.com
State New
Headers
Series Add support for SMMU ECMDQ |

Commit Message

Tanmay Jagdale July 21, 2023, 6:35 a.m. UTC
  From: Zhen Lei <thunder.leizhen@huawei.com>

Ensure that each core exclusively occupies an ECMDQ and all of them are
enabled during initialization. During this initialization process, any
errors will result in a fallback to using normal CMDQ.

When GERROR is triggered by ECMDQ, all ECMDQs need to be traversed: the
ECMDQs with errors will be processed and the ECMDQs without errors will
be skipped directly.

Compared with register SMMU_CMDQ_PROD, register SMMU_ECMDQ_PROD has one
more 'EN' bit and one more 'ERRACK' bit. Therefore, an extra member
'ecmdq_prod' is added to record the values of these two bits. Each time
register SMMU_ECMDQ_PROD is updated, the value of 'ecmdq_prod' is ORed.
After the error indicated by SMMU_GERROR.CMDQP_ERR is fixed, the 'ERRACK'
bit needs to be toggled to resume the corresponding ECMDQ. Therefore, a
rwlock is used to protect the write operation to bit 'ERRACK' during error
handling and the read operation to bit 'ERRACK' during command insertion.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 210 +++++++++++++++++++-
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  36 ++++
 2 files changed, 245 insertions(+), 1 deletion(-)
  

Comments

kernel test robot July 27, 2023, 11:13 a.m. UTC | #1
Hi Tanmay,

kernel test robot noticed the following build warnings:

[auto build test WARNING on v6.5-rc2]
[also build test WARNING on linus/master next-20230727]
[cannot apply to joro-iommu/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Tanmay-Jagdale/iommu-arm-smmu-v3-Add-support-for-ECMDQ-register-mode/20230721-143913
base:   v6.5-rc2
patch link:    https://lore.kernel.org/r/20230721063513.33431-2-tanmay%40marvell.com
patch subject: [RESEND PATCH 1/4] iommu/arm-smmu-v3: Add support for ECMDQ register mode
config: arm64-randconfig-r083-20230726 (https://download.01.org/0day-ci/archive/20230727/202307271849.sqXiM3CK-lkp@intel.com/config)
compiler: aarch64-linux-gcc (GCC) 12.3.0
reproduce: (https://download.01.org/0day-ci/archive/20230727/202307271849.sqXiM3CK-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202307271849.sqXiM3CK-lkp@intel.com/

sparse warnings: (new ones prefixed by >>)
>> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3573:23: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct arm_smmu_ecmdq *ecmdq @@     got struct arm_smmu_ecmdq [noderef] __percpu * @@
   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3573:23: sparse:     expected struct arm_smmu_ecmdq *ecmdq
   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3573:23: sparse:     got struct arm_smmu_ecmdq [noderef] __percpu *
>> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3578:58: sparse: sparse: incorrect type in initializer (different address spaces) @@     expected void const [noderef] __percpu *__vpp_verify @@     got struct arm_smmu_ecmdq * @@
   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3578:58: sparse:     expected void const [noderef] __percpu *__vpp_verify
   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3578:58: sparse:     got struct arm_smmu_ecmdq *
>> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3604:45: sparse: sparse: incorrect type in argument 1 (different address spaces) @@     expected void const *addr @@     got void [noderef] __iomem *base @@
   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3604:45: sparse:     expected void const *addr
   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3604:45: sparse:     got void [noderef] __iomem *base
   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c: note: in included file (through arch/arm64/include/asm/atomic.h, include/linux/atomic.h, include/asm-generic/bitops/atomic.h, ...):
   arch/arm64/include/asm/cmpxchg.h:168:1: sparse: sparse: cast truncates bits from constant value (ffffffff80000000 becomes 0)
   arch/arm64/include/asm/cmpxchg.h:168:1: sparse: sparse: cast truncates bits from constant value (ffffffff80000000 becomes 0)

vim +3573 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c

  3566	
  3567	static int arm_smmu_ecmdq_layout(struct arm_smmu_device *smmu)
  3568	{
  3569		int cpu;
  3570		struct arm_smmu_ecmdq *ecmdq;
  3571	
  3572		if (num_possible_cpus() <= smmu->nr_ecmdq) {
> 3573			ecmdq = devm_alloc_percpu(smmu->dev, *ecmdq);
  3574			if (!ecmdq)
  3575				return -ENOMEM;
  3576	
  3577			for_each_possible_cpu(cpu)
> 3578				*per_cpu_ptr(smmu->ecmdq, cpu) = per_cpu_ptr(ecmdq, cpu);
  3579	
  3580			/* A core requires at most one ECMDQ */
  3581			smmu->nr_ecmdq = num_possible_cpus();
  3582	
  3583			return 0;
  3584		}
  3585	
  3586		return -ENOSPC;
  3587	}
  3588	
  3589	static int arm_smmu_ecmdq_probe(struct arm_smmu_device *smmu)
  3590	{
  3591		int ret, cpu;
  3592		u32 i, nump, numq, gap;
  3593		u32 reg, shift_increment;
  3594		u64 addr, smmu_dma_base;
  3595		void __iomem *cp_regs, *cp_base;
  3596	
  3597		/* IDR6 */
  3598		reg = readl_relaxed(smmu->base + ARM_SMMU_IDR6);
  3599		nump = 1 << FIELD_GET(IDR6_LOG2NUMP, reg);
  3600		numq = 1 << FIELD_GET(IDR6_LOG2NUMQ, reg);
  3601		smmu->nr_ecmdq = nump * numq;
  3602		gap = ECMDQ_CP_RRESET_SIZE >> FIELD_GET(IDR6_LOG2NUMQ, reg);
  3603	
> 3604		smmu_dma_base = (vmalloc_to_pfn(smmu->base) << PAGE_SHIFT);
  3605		cp_regs = ioremap(smmu_dma_base + ARM_SMMU_ECMDQ_CP_BASE, PAGE_SIZE);
  3606		if (!cp_regs)
  3607			return -ENOMEM;
  3608	
  3609		for (i = 0; i < nump; i++) {
  3610			u64 val, pre_addr;
  3611	
  3612			val = readq_relaxed(cp_regs + 32 * i);
  3613			if (!(val & ECMDQ_CP_PRESET)) {
  3614				iounmap(cp_regs);
  3615				dev_err(smmu->dev, "ecmdq control page %u is memory mode\n", i);
  3616				return -EFAULT;
  3617			}
  3618	
  3619			if (i && ((val & ECMDQ_CP_ADDR) != (pre_addr + ECMDQ_CP_RRESET_SIZE))) {
  3620				iounmap(cp_regs);
  3621				dev_err(smmu->dev, "ecmdq_cp memory region is not contiguous\n");
  3622				return -EFAULT;
  3623			}
  3624	
  3625			pre_addr = val & ECMDQ_CP_ADDR;
  3626		}
  3627	
  3628		addr = readl_relaxed(cp_regs) & ECMDQ_CP_ADDR;
  3629		iounmap(cp_regs);
  3630	
  3631		cp_base = devm_ioremap(smmu->dev, smmu_dma_base + addr, ECMDQ_CP_RRESET_SIZE * nump);
  3632		if (!cp_base)
  3633			return -ENOMEM;
  3634	
  3635		smmu->ecmdq = devm_alloc_percpu(smmu->dev, struct arm_smmu_ecmdq *);
  3636		if (!smmu->ecmdq)
  3637			return -ENOMEM;
  3638	
  3639		ret = arm_smmu_ecmdq_layout(smmu);
  3640		if (ret)
  3641			return ret;
  3642	
  3643		shift_increment = order_base_2(num_possible_cpus() / smmu->nr_ecmdq);
  3644	
  3645		addr = 0;
  3646		for_each_possible_cpu(cpu) {
  3647			struct arm_smmu_ecmdq *ecmdq;
  3648			struct arm_smmu_queue *q;
  3649	
  3650			ecmdq = *per_cpu_ptr(smmu->ecmdq, cpu);
  3651			ecmdq->base = cp_base + addr;
  3652	
  3653			q = &ecmdq->cmdq.q;
  3654	
  3655			q->llq.max_n_shift = ECMDQ_MAX_SZ_SHIFT + shift_increment;
  3656			ret = arm_smmu_init_one_queue(smmu, q, ecmdq->base, ARM_SMMU_ECMDQ_PROD,
  3657					ARM_SMMU_ECMDQ_CONS, CMDQ_ENT_DWORDS, "ecmdq");
  3658			if (ret)
  3659				return ret;
  3660	
  3661			q->ecmdq_prod = ECMDQ_PROD_EN;
  3662			rwlock_init(&q->ecmdq_lock);
  3663	
  3664			ret = arm_smmu_ecmdq_init(&ecmdq->cmdq);
  3665			if (ret) {
  3666				dev_err(smmu->dev, "ecmdq[%d] init failed\n", i);
  3667				return ret;
  3668			}
  3669	
  3670			addr += gap;
  3671		}
  3672	
  3673		return 0;
  3674	}
  3675
  
Leizhen (ThunderTown) July 27, 2023, 12:16 p.m. UTC | #2
On 2023/7/27 19:13, kernel test robot wrote:
> Hi Tanmay,
> 
> kernel test robot noticed the following build warnings:
> 
> [auto build test WARNING on v6.5-rc2]
> [also build test WARNING on linus/master next-20230727]
> [cannot apply to joro-iommu/next]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch#_base_tree_information]
> 
> url:    https://github.com/intel-lab-lkp/linux/commits/Tanmay-Jagdale/iommu-arm-smmu-v3-Add-support-for-ECMDQ-register-mode/20230721-143913
> base:   v6.5-rc2
> patch link:    https://lore.kernel.org/r/20230721063513.33431-2-tanmay%40marvell.com
> patch subject: [RESEND PATCH 1/4] iommu/arm-smmu-v3: Add support for ECMDQ register mode
> config: arm64-randconfig-r083-20230726 (https://download.01.org/0day-ci/archive/20230727/202307271849.sqXiM3CK-lkp@intel.com/config)
> compiler: aarch64-linux-gcc (GCC) 12.3.0
> reproduce: (https://download.01.org/0day-ci/archive/20230727/202307271849.sqXiM3CK-lkp@intel.com/reproduce)
> 
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <lkp@intel.com>
> | Closes: https://lore.kernel.org/oe-kbuild-all/202307271849.sqXiM3CK-lkp@intel.com/
> 
> sparse warnings: (new ones prefixed by >>)
>>> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3573:23: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct arm_smmu_ecmdq *ecmdq @@     got struct arm_smmu_ecmdq [noderef] __percpu * @@
>    drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3573:23: sparse:     expected struct arm_smmu_ecmdq *ecmdq
>    drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3573:23: sparse:     got struct arm_smmu_ecmdq [noderef] __percpu *

OK, thanks, the local variable 'ecmdq' need modifier '__percpu'.

>>> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3578:58: sparse: sparse: incorrect type in initializer (different address spaces) @@     expected void const [noderef] __percpu *__vpp_verify @@     got struct arm_smmu_ecmdq * @@
>    drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3578:58: sparse:     expected void const [noderef] __percpu *__vpp_verify
>    drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3578:58: sparse:     got struct arm_smmu_ecmdq *
>>> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3604:45: sparse: sparse: incorrect type in argument 1 (different address spaces) @@     expected void const *addr @@     got void [noderef] __iomem *base @@
>    drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3604:45: sparse:     expected void const *addr
>    drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3604:45: sparse:     got void [noderef] __iomem *base

Perhaps it would be better to add a member to the structure arm_smmu_device
to record the start physical address of the smmu.

>    drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c: note: in included file (through arch/arm64/include/asm/atomic.h, include/linux/atomic.h, include/asm-generic/bitops/atomic.h, ...):
>    arch/arm64/include/asm/cmpxchg.h:168:1: sparse: sparse: cast truncates bits from constant value (ffffffff80000000 becomes 0)
>    arch/arm64/include/asm/cmpxchg.h:168:1: sparse: sparse: cast truncates bits from constant value (ffffffff80000000 becomes 0)
> 
> vim +3573 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> 
>   3566	
>   3567	static int arm_smmu_ecmdq_layout(struct arm_smmu_device *smmu)
>   3568	{
>   3569		int cpu;
>   3570		struct arm_smmu_ecmdq *ecmdq;
>   3571	
>   3572		if (num_possible_cpus() <= smmu->nr_ecmdq) {
>> 3573			ecmdq = devm_alloc_percpu(smmu->dev, *ecmdq);
>   3574			if (!ecmdq)
>   3575				return -ENOMEM;
>   3576	
>   3577			for_each_possible_cpu(cpu)
>> 3578				*per_cpu_ptr(smmu->ecmdq, cpu) = per_cpu_ptr(ecmdq, cpu);
>   3579	
>   3580			/* A core requires at most one ECMDQ */
>   3581			smmu->nr_ecmdq = num_possible_cpus();
>   3582	
>   3583			return 0;
>   3584		}
>   3585	
>   3586		return -ENOSPC;
>   3587	}
>   3588	
>   3589	static int arm_smmu_ecmdq_probe(struct arm_smmu_device *smmu)
>   3590	{
>   3591		int ret, cpu;
>   3592		u32 i, nump, numq, gap;
>   3593		u32 reg, shift_increment;
>   3594		u64 addr, smmu_dma_base;
>   3595		void __iomem *cp_regs, *cp_base;
>   3596	
>   3597		/* IDR6 */
>   3598		reg = readl_relaxed(smmu->base + ARM_SMMU_IDR6);
>   3599		nump = 1 << FIELD_GET(IDR6_LOG2NUMP, reg);
>   3600		numq = 1 << FIELD_GET(IDR6_LOG2NUMQ, reg);
>   3601		smmu->nr_ecmdq = nump * numq;
>   3602		gap = ECMDQ_CP_RRESET_SIZE >> FIELD_GET(IDR6_LOG2NUMQ, reg);
>   3603	
>> 3604		smmu_dma_base = (vmalloc_to_pfn(smmu->base) << PAGE_SHIFT);
>   3605		cp_regs = ioremap(smmu_dma_base + ARM_SMMU_ECMDQ_CP_BASE, PAGE_SIZE);
>   3606		if (!cp_regs)
>   3607			return -ENOMEM;
>   3608	
>   3609		for (i = 0; i < nump; i++) {
>   3610			u64 val, pre_addr;
>   3611	
>   3612			val = readq_relaxed(cp_regs + 32 * i);
>   3613			if (!(val & ECMDQ_CP_PRESET)) {
>   3614				iounmap(cp_regs);
>   3615				dev_err(smmu->dev, "ecmdq control page %u is memory mode\n", i);
>   3616				return -EFAULT;
>   3617			}
>   3618	
>   3619			if (i && ((val & ECMDQ_CP_ADDR) != (pre_addr + ECMDQ_CP_RRESET_SIZE))) {
>   3620				iounmap(cp_regs);
>   3621				dev_err(smmu->dev, "ecmdq_cp memory region is not contiguous\n");
>   3622				return -EFAULT;
>   3623			}
>   3624	
>   3625			pre_addr = val & ECMDQ_CP_ADDR;
>   3626		}
>   3627	
>   3628		addr = readl_relaxed(cp_regs) & ECMDQ_CP_ADDR;
>   3629		iounmap(cp_regs);
>   3630	
>   3631		cp_base = devm_ioremap(smmu->dev, smmu_dma_base + addr, ECMDQ_CP_RRESET_SIZE * nump);
>   3632		if (!cp_base)
>   3633			return -ENOMEM;
>   3634	
>   3635		smmu->ecmdq = devm_alloc_percpu(smmu->dev, struct arm_smmu_ecmdq *);
>   3636		if (!smmu->ecmdq)
>   3637			return -ENOMEM;
>   3638	
>   3639		ret = arm_smmu_ecmdq_layout(smmu);
>   3640		if (ret)
>   3641			return ret;
>   3642	
>   3643		shift_increment = order_base_2(num_possible_cpus() / smmu->nr_ecmdq);
>   3644	
>   3645		addr = 0;
>   3646		for_each_possible_cpu(cpu) {
>   3647			struct arm_smmu_ecmdq *ecmdq;
>   3648			struct arm_smmu_queue *q;
>   3649	
>   3650			ecmdq = *per_cpu_ptr(smmu->ecmdq, cpu);
>   3651			ecmdq->base = cp_base + addr;
>   3652	
>   3653			q = &ecmdq->cmdq.q;
>   3654	
>   3655			q->llq.max_n_shift = ECMDQ_MAX_SZ_SHIFT + shift_increment;
>   3656			ret = arm_smmu_init_one_queue(smmu, q, ecmdq->base, ARM_SMMU_ECMDQ_PROD,
>   3657					ARM_SMMU_ECMDQ_CONS, CMDQ_ENT_DWORDS, "ecmdq");
>   3658			if (ret)
>   3659				return ret;
>   3660	
>   3661			q->ecmdq_prod = ECMDQ_PROD_EN;
>   3662			rwlock_init(&q->ecmdq_lock);
>   3663	
>   3664			ret = arm_smmu_ecmdq_init(&ecmdq->cmdq);
>   3665			if (ret) {
>   3666				dev_err(smmu->dev, "ecmdq[%d] init failed\n", i);
>   3667				return ret;
>   3668			}
>   3669	
>   3670			addr += gap;
>   3671		}
>   3672	
>   3673		return 0;
>   3674	}
>   3675	
>
  
kernel test robot July 28, 2023, 8:18 a.m. UTC | #3
Hi Tanmay,

kernel test robot noticed the following build warnings:

[auto build test WARNING on v6.5-rc2]
[also build test WARNING on linus/master next-20230728]
[cannot apply to joro-iommu/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Tanmay-Jagdale/iommu-arm-smmu-v3-Add-support-for-ECMDQ-register-mode/20230721-143913
base:   v6.5-rc2
patch link:    https://lore.kernel.org/r/20230721063513.33431-2-tanmay%40marvell.com
patch subject: [RESEND PATCH 1/4] iommu/arm-smmu-v3: Add support for ECMDQ register mode
config: arm64-randconfig-r032-20230727 (https://download.01.org/0day-ci/archive/20230728/202307281657.K3A7kRax-lkp@intel.com/config)
compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a)
reproduce: (https://download.01.org/0day-ci/archive/20230728/202307281657.K3A7kRax-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202307281657.K3A7kRax-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3619:39: warning: variable 'pre_addr' is uninitialized when used here [-Wuninitialized]
    3619 |                 if (i && ((val & ECMDQ_CP_ADDR) != (pre_addr + ECMDQ_CP_RRESET_SIZE))) {
         |                                                     ^~~~~~~~
   include/linux/compiler.h:55:47: note: expanded from macro 'if'
      55 | #define if(cond, ...) if ( __trace_if_var( !!(cond , ## __VA_ARGS__) ) )
         |                                               ^~~~
   include/linux/compiler.h:57:52: note: expanded from macro '__trace_if_var'
      57 | #define __trace_if_var(cond) (__builtin_constant_p(cond) ? (cond) : __trace_if_value(cond))
         |                                                    ^~~~
   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3610:20: note: initialize the variable 'pre_addr' to silence this warning
    3610 |                 u64 val, pre_addr;
         |                                  ^
         |                                   = 0
   1 warning generated.


vim +/pre_addr +3619 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c

  3588	
  3589	static int arm_smmu_ecmdq_probe(struct arm_smmu_device *smmu)
  3590	{
  3591		int ret, cpu;
  3592		u32 i, nump, numq, gap;
  3593		u32 reg, shift_increment;
  3594		u64 addr, smmu_dma_base;
  3595		void __iomem *cp_regs, *cp_base;
  3596	
  3597		/* IDR6 */
  3598		reg = readl_relaxed(smmu->base + ARM_SMMU_IDR6);
  3599		nump = 1 << FIELD_GET(IDR6_LOG2NUMP, reg);
  3600		numq = 1 << FIELD_GET(IDR6_LOG2NUMQ, reg);
  3601		smmu->nr_ecmdq = nump * numq;
  3602		gap = ECMDQ_CP_RRESET_SIZE >> FIELD_GET(IDR6_LOG2NUMQ, reg);
  3603	
  3604		smmu_dma_base = (vmalloc_to_pfn(smmu->base) << PAGE_SHIFT);
  3605		cp_regs = ioremap(smmu_dma_base + ARM_SMMU_ECMDQ_CP_BASE, PAGE_SIZE);
  3606		if (!cp_regs)
  3607			return -ENOMEM;
  3608	
  3609		for (i = 0; i < nump; i++) {
  3610			u64 val, pre_addr;
  3611	
  3612			val = readq_relaxed(cp_regs + 32 * i);
  3613			if (!(val & ECMDQ_CP_PRESET)) {
  3614				iounmap(cp_regs);
  3615				dev_err(smmu->dev, "ecmdq control page %u is memory mode\n", i);
  3616				return -EFAULT;
  3617			}
  3618	
> 3619			if (i && ((val & ECMDQ_CP_ADDR) != (pre_addr + ECMDQ_CP_RRESET_SIZE))) {
  3620				iounmap(cp_regs);
  3621				dev_err(smmu->dev, "ecmdq_cp memory region is not contiguous\n");
  3622				return -EFAULT;
  3623			}
  3624	
  3625			pre_addr = val & ECMDQ_CP_ADDR;
  3626		}
  3627	
  3628		addr = readl_relaxed(cp_regs) & ECMDQ_CP_ADDR;
  3629		iounmap(cp_regs);
  3630	
  3631		cp_base = devm_ioremap(smmu->dev, smmu_dma_base + addr, ECMDQ_CP_RRESET_SIZE * nump);
  3632		if (!cp_base)
  3633			return -ENOMEM;
  3634	
  3635		smmu->ecmdq = devm_alloc_percpu(smmu->dev, struct arm_smmu_ecmdq *);
  3636		if (!smmu->ecmdq)
  3637			return -ENOMEM;
  3638	
  3639		ret = arm_smmu_ecmdq_layout(smmu);
  3640		if (ret)
  3641			return ret;
  3642	
  3643		shift_increment = order_base_2(num_possible_cpus() / smmu->nr_ecmdq);
  3644	
  3645		addr = 0;
  3646		for_each_possible_cpu(cpu) {
  3647			struct arm_smmu_ecmdq *ecmdq;
  3648			struct arm_smmu_queue *q;
  3649	
  3650			ecmdq = *per_cpu_ptr(smmu->ecmdq, cpu);
  3651			ecmdq->base = cp_base + addr;
  3652	
  3653			q = &ecmdq->cmdq.q;
  3654	
  3655			q->llq.max_n_shift = ECMDQ_MAX_SZ_SHIFT + shift_increment;
  3656			ret = arm_smmu_init_one_queue(smmu, q, ecmdq->base, ARM_SMMU_ECMDQ_PROD,
  3657					ARM_SMMU_ECMDQ_CONS, CMDQ_ENT_DWORDS, "ecmdq");
  3658			if (ret)
  3659				return ret;
  3660	
  3661			q->ecmdq_prod = ECMDQ_PROD_EN;
  3662			rwlock_init(&q->ecmdq_lock);
  3663	
  3664			ret = arm_smmu_ecmdq_init(&ecmdq->cmdq);
  3665			if (ret) {
  3666				dev_err(smmu->dev, "ecmdq[%d] init failed\n", i);
  3667				return ret;
  3668			}
  3669	
  3670			addr += gap;
  3671		}
  3672	
  3673		return 0;
  3674	}
  3675
  

Patch

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 9b0dc3505601..dfb5bf8cbcf9 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -347,6 +347,14 @@  static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
 
 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
 {
+	if (smmu->ecmdq_enabled) {
+		struct arm_smmu_ecmdq *ecmdq;
+
+		ecmdq = *this_cpu_ptr(smmu->ecmdq);
+
+		return &ecmdq->cmdq;
+	}
+
 	return &smmu->cmdq;
 }
 
@@ -429,6 +437,38 @@  static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
 	__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
 }
 
+static void arm_smmu_ecmdq_skip_err(struct arm_smmu_device *smmu)
+{
+	int i;
+	u32 prod, cons;
+	struct arm_smmu_queue *q;
+	struct arm_smmu_ecmdq *ecmdq;
+
+	for (i = 0; i < smmu->nr_ecmdq; i++) {
+		unsigned long flags;
+
+		ecmdq = *per_cpu_ptr(smmu->ecmdq, i);
+		q = &ecmdq->cmdq.q;
+
+		prod = readl_relaxed(q->prod_reg);
+		cons = readl_relaxed(q->cons_reg);
+		if (((prod ^ cons) & ECMDQ_CONS_ERR) == 0)
+			continue;
+
+		__arm_smmu_cmdq_skip_err(smmu, q);
+
+		write_lock_irqsave(&q->ecmdq_lock, flags);
+		q->ecmdq_prod &= ~ECMDQ_PROD_ERRACK;
+		q->ecmdq_prod |= cons & ECMDQ_CONS_ERR;
+
+		prod = readl_relaxed(q->prod_reg);
+		prod &= ~ECMDQ_PROD_ERRACK;
+		prod |= cons & ECMDQ_CONS_ERR;
+		writel(prod, q->prod_reg);
+		write_unlock_irqrestore(&q->ecmdq_lock, flags);
+	}
+}
+
 /*
  * Command queue locking.
  * This is a form of bastardised rwlock with the following major changes:
@@ -825,7 +865,13 @@  static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
 		 * d. Advance the hardware prod pointer
 		 * Control dependency ordering from the entries becoming valid.
 		 */
-		writel_relaxed(prod, cmdq->q.prod_reg);
+		if (smmu->ecmdq_enabled) {
+			read_lock(&cmdq->q.ecmdq_lock);
+			writel_relaxed(prod | cmdq->q.ecmdq_prod, cmdq->q.prod_reg);
+			read_unlock(&cmdq->q.ecmdq_lock);
+		} else {
+			writel_relaxed(prod, cmdq->q.prod_reg);
+		}
 
 		/*
 		 * e. Tell the next owner we're done
@@ -1701,6 +1747,9 @@  static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
 	if (active & GERROR_CMDQ_ERR)
 		arm_smmu_cmdq_skip_err(smmu);
 
+	if (active & GERROR_CMDQP_ERR)
+		arm_smmu_ecmdq_skip_err(smmu);
+
 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
 	return IRQ_HANDLED;
 }
@@ -2957,6 +3006,20 @@  static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
 	return 0;
 }
 
+static int arm_smmu_ecmdq_init(struct arm_smmu_cmdq *cmdq)
+{
+	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
+
+	atomic_set(&cmdq->owner_prod, 0);
+	atomic_set(&cmdq->lock, 0);
+
+	cmdq->valid_map = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
+	if (!cmdq->valid_map)
+		return -ENOMEM;
+
+	return 0;
+}
+
 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
 {
 	int ret;
@@ -3307,6 +3370,7 @@  static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
 
 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
 {
+	int i;
 	int ret;
 	u32 reg, enables;
 	struct arm_smmu_cmdq_ent cmd;
@@ -3351,6 +3415,28 @@  static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
 
+	for (i = 0; i < smmu->nr_ecmdq; i++) {
+		struct arm_smmu_ecmdq *ecmdq;
+		struct arm_smmu_queue *q;
+
+		ecmdq = *per_cpu_ptr(smmu->ecmdq, i);
+		q = &ecmdq->cmdq.q;
+
+		writeq_relaxed(q->q_base, ecmdq->base + ARM_SMMU_ECMDQ_BASE);
+		writel_relaxed(q->llq.prod, ecmdq->base + ARM_SMMU_ECMDQ_PROD);
+		writel_relaxed(q->llq.cons, ecmdq->base + ARM_SMMU_ECMDQ_CONS);
+
+		/* enable ecmdq */
+		writel(ECMDQ_PROD_EN, q->prod_reg);
+		ret = readl_relaxed_poll_timeout(q->cons_reg, reg, reg & ECMDQ_CONS_ENACK,
+					  1, ARM_SMMU_POLL_TIMEOUT_US);
+		if (ret) {
+			dev_err(smmu->dev, "ecmdq[%d] enable failed\n", i);
+			smmu->ecmdq_enabled = 0;
+			break;
+		}
+	}
+
 	enables = CR0_CMDQEN;
 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
 				      ARM_SMMU_CR0ACK);
@@ -3476,6 +3562,115 @@  static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
 		}
 		break;
 	}
+};
+
+static int arm_smmu_ecmdq_layout(struct arm_smmu_device *smmu)
+{
+	int cpu;
+	struct arm_smmu_ecmdq *ecmdq;
+
+	if (num_possible_cpus() <= smmu->nr_ecmdq) {
+		ecmdq = devm_alloc_percpu(smmu->dev, *ecmdq);
+		if (!ecmdq)
+			return -ENOMEM;
+
+		for_each_possible_cpu(cpu)
+			*per_cpu_ptr(smmu->ecmdq, cpu) = per_cpu_ptr(ecmdq, cpu);
+
+		/* A core requires at most one ECMDQ */
+		smmu->nr_ecmdq = num_possible_cpus();
+
+		return 0;
+	}
+
+	return -ENOSPC;
+}
+
+static int arm_smmu_ecmdq_probe(struct arm_smmu_device *smmu)
+{
+	int ret, cpu;
+	u32 i, nump, numq, gap;
+	u32 reg, shift_increment;
+	u64 addr, smmu_dma_base;
+	void __iomem *cp_regs, *cp_base;
+
+	/* IDR6 */
+	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR6);
+	nump = 1 << FIELD_GET(IDR6_LOG2NUMP, reg);
+	numq = 1 << FIELD_GET(IDR6_LOG2NUMQ, reg);
+	smmu->nr_ecmdq = nump * numq;
+	gap = ECMDQ_CP_RRESET_SIZE >> FIELD_GET(IDR6_LOG2NUMQ, reg);
+
+	smmu_dma_base = (vmalloc_to_pfn(smmu->base) << PAGE_SHIFT);
+	cp_regs = ioremap(smmu_dma_base + ARM_SMMU_ECMDQ_CP_BASE, PAGE_SIZE);
+	if (!cp_regs)
+		return -ENOMEM;
+
+	for (i = 0; i < nump; i++) {
+		u64 val, pre_addr;
+
+		val = readq_relaxed(cp_regs + 32 * i);
+		if (!(val & ECMDQ_CP_PRESET)) {
+			iounmap(cp_regs);
+			dev_err(smmu->dev, "ecmdq control page %u is memory mode\n", i);
+			return -EFAULT;
+		}
+
+		if (i && ((val & ECMDQ_CP_ADDR) != (pre_addr + ECMDQ_CP_RRESET_SIZE))) {
+			iounmap(cp_regs);
+			dev_err(smmu->dev, "ecmdq_cp memory region is not contiguous\n");
+			return -EFAULT;
+		}
+
+		pre_addr = val & ECMDQ_CP_ADDR;
+	}
+
+	addr = readl_relaxed(cp_regs) & ECMDQ_CP_ADDR;
+	iounmap(cp_regs);
+
+	cp_base = devm_ioremap(smmu->dev, smmu_dma_base + addr, ECMDQ_CP_RRESET_SIZE * nump);
+	if (!cp_base)
+		return -ENOMEM;
+
+	smmu->ecmdq = devm_alloc_percpu(smmu->dev, struct arm_smmu_ecmdq *);
+	if (!smmu->ecmdq)
+		return -ENOMEM;
+
+	ret = arm_smmu_ecmdq_layout(smmu);
+	if (ret)
+		return ret;
+
+	shift_increment = order_base_2(num_possible_cpus() / smmu->nr_ecmdq);
+
+	addr = 0;
+	for_each_possible_cpu(cpu) {
+		struct arm_smmu_ecmdq *ecmdq;
+		struct arm_smmu_queue *q;
+
+		ecmdq = *per_cpu_ptr(smmu->ecmdq, cpu);
+		ecmdq->base = cp_base + addr;
+
+		q = &ecmdq->cmdq.q;
+
+		q->llq.max_n_shift = ECMDQ_MAX_SZ_SHIFT + shift_increment;
+		ret = arm_smmu_init_one_queue(smmu, q, ecmdq->base, ARM_SMMU_ECMDQ_PROD,
+				ARM_SMMU_ECMDQ_CONS, CMDQ_ENT_DWORDS, "ecmdq");
+		if (ret)
+			return ret;
+
+		q->ecmdq_prod = ECMDQ_PROD_EN;
+		rwlock_init(&q->ecmdq_lock);
+
+		ret = arm_smmu_ecmdq_init(&ecmdq->cmdq);
+		if (ret) {
+			dev_err(smmu->dev, "ecmdq[%d] init failed\n", i);
+			return ret;
+		}
+
+		addr += gap;
+	}
+
+	return 0;
 }
 
 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
@@ -3588,6 +3783,9 @@  static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 		return -ENXIO;
 	}
 
+	if (reg & IDR1_ECMDQ)
+		smmu->features |= ARM_SMMU_FEAT_ECMDQ;
+
 	/* Queue sizes, capped to ensure natural alignment */
 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
 					     FIELD_GET(IDR1_CMDQS, reg));
@@ -3695,6 +3893,16 @@  static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 
 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
 		 smmu->ias, smmu->oas, smmu->features);
+
+	if (smmu->features & ARM_SMMU_FEAT_ECMDQ) {
+		int err;
+
+		err = arm_smmu_ecmdq_probe(smmu);
+		if (err) {
+			dev_err(smmu->dev, "suppress ecmdq feature, errno=%d\n", err);
+			smmu->ecmdq_enabled = 0;
+		}
+	}
 	return 0;
 }
 
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index dcab85698a4e..1f8777817e31 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -41,6 +41,7 @@ 
 #define IDR0_S2P			(1 << 0)
 
 #define ARM_SMMU_IDR1			0x4
+#define IDR1_ECMDQ			(1 << 31)
 #define IDR1_TABLES_PRESET		(1 << 30)
 #define IDR1_QUEUES_PRESET		(1 << 29)
 #define IDR1_REL			(1 << 28)
@@ -113,6 +114,7 @@ 
 #define ARM_SMMU_IRQ_CTRLACK		0x54
 
 #define ARM_SMMU_GERROR			0x60
+#define GERROR_CMDQP_ERR		(1 << 9)
 #define GERROR_SFM_ERR			(1 << 8)
 #define GERROR_MSI_GERROR_ABT_ERR	(1 << 7)
 #define GERROR_MSI_PRIQ_ABT_ERR		(1 << 6)
@@ -158,6 +160,26 @@ 
 #define ARM_SMMU_PRIQ_IRQ_CFG1		0xd8
 #define ARM_SMMU_PRIQ_IRQ_CFG2		0xdc
 
+#define ARM_SMMU_IDR6			0x190
+#define IDR6_LOG2NUMP			GENMASK(27, 24)
+#define IDR6_LOG2NUMQ			GENMASK(19, 16)
+#define IDR6_BA_DOORBELLS		GENMASK(9, 0)
+
+#define ARM_SMMU_ECMDQ_BASE		0x00
+#define ARM_SMMU_ECMDQ_PROD		0x08
+#define ARM_SMMU_ECMDQ_CONS		0x0c
+#define ECMDQ_MAX_SZ_SHIFT		8
+#define ECMDQ_PROD_EN			(1 << 31)
+#define ECMDQ_CONS_ENACK		(1 << 31)
+#define ECMDQ_CONS_ERR			(1 << 23)
+#define ECMDQ_PROD_ERRACK		(1 << 23)
+
+#define ARM_SMMU_ECMDQ_CP_BASE		0x4000
+#define ECMDQ_CP_ADDR			GENMASK_ULL(51, 12)
+#define ECMDQ_CP_CMDQGS			GENMASK_ULL(2, 1)
+#define ECMDQ_CP_PRESET			(1UL << 0)
+#define ECMDQ_CP_RRESET_SIZE		0x10000
+
 #define ARM_SMMU_REG_SZ			0xe00
 
 /* Common MSI config fields */
@@ -527,6 +549,8 @@  struct arm_smmu_ll_queue {
 struct arm_smmu_queue {
 	struct arm_smmu_ll_queue	llq;
 	int				irq; /* Wired interrupt */
+	u32				ecmdq_prod;
+	rwlock_t			ecmdq_lock;
 
 	__le64				*base;
 	dma_addr_t			base_dma;
@@ -552,6 +576,11 @@  struct arm_smmu_cmdq {
 	atomic_t			lock;
 };
 
+struct arm_smmu_ecmdq {
+	struct arm_smmu_cmdq		cmdq;
+	void __iomem			*base;
+};
+
 struct arm_smmu_cmdq_batch {
 	u64				cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
 	int				num;
@@ -646,6 +675,7 @@  struct arm_smmu_device {
 #define ARM_SMMU_FEAT_SVA		(1 << 17)
 #define ARM_SMMU_FEAT_E2H		(1 << 18)
 #define ARM_SMMU_FEAT_NESTING		(1 << 19)
+#define ARM_SMMU_FEAT_ECMDQ		(1 << 20)
 	u32				features;
 
 #define ARM_SMMU_OPT_SKIP_PREFETCH	(1 << 0)
@@ -654,6 +684,12 @@  struct arm_smmu_device {
 #define ARM_SMMU_OPT_CMDQ_FORCE_SYNC	(1 << 3)
 	u32				options;
 
+	union {
+		u32			nr_ecmdq;
+		u32			ecmdq_enabled;
+	};
+	struct arm_smmu_ecmdq *__percpu	*ecmdq;
+
 	struct arm_smmu_cmdq		cmdq;
 	struct arm_smmu_evtq		evtq;
 	struct arm_smmu_priq		priq;