[RESEND,1/4] iommu/arm-smmu-v3: Add support for ECMDQ register mode
Commit Message
From: Zhen Lei <thunder.leizhen@huawei.com>
Ensure that each core exclusively occupies an ECMDQ and all of them are
enabled during initialization. During this initialization process, any
errors will result in a fallback to using normal CMDQ.
When GERROR is triggered by ECMDQ, all ECMDQs need to be traversed: the
ECMDQs with errors will be processed and the ECMDQs without errors will
be skipped directly.
Compared with register SMMU_CMDQ_PROD, register SMMU_ECMDQ_PROD has one
more 'EN' bit and one more 'ERRACK' bit. Therefore, an extra member
'ecmdq_prod' is added to record the values of these two bits. Each time
register SMMU_ECMDQ_PROD is updated, the value of 'ecmdq_prod' is ORed.
After the error indicated by SMMU_GERROR.CMDQP_ERR is fixed, the 'ERRACK'
bit needs to be toggled to resume the corresponding ECMDQ. Therefore, a
rwlock is used to protect the write operation to bit 'ERRACK' during error
handling and the read operation to bit 'ERRACK' during command insertion.
Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 210 +++++++++++++++++++-
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 36 ++++
2 files changed, 245 insertions(+), 1 deletion(-)
Comments
Hi Tanmay,
kernel test robot noticed the following build warnings:
[auto build test WARNING on v6.5-rc2]
[also build test WARNING on linus/master next-20230727]
[cannot apply to joro-iommu/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Tanmay-Jagdale/iommu-arm-smmu-v3-Add-support-for-ECMDQ-register-mode/20230721-143913
base: v6.5-rc2
patch link: https://lore.kernel.org/r/20230721063513.33431-2-tanmay%40marvell.com
patch subject: [RESEND PATCH 1/4] iommu/arm-smmu-v3: Add support for ECMDQ register mode
config: arm64-randconfig-r083-20230726 (https://download.01.org/0day-ci/archive/20230727/202307271849.sqXiM3CK-lkp@intel.com/config)
compiler: aarch64-linux-gcc (GCC) 12.3.0
reproduce: (https://download.01.org/0day-ci/archive/20230727/202307271849.sqXiM3CK-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202307271849.sqXiM3CK-lkp@intel.com/
sparse warnings: (new ones prefixed by >>)
>> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3573:23: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected struct arm_smmu_ecmdq *ecmdq @@ got struct arm_smmu_ecmdq [noderef] __percpu * @@
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3573:23: sparse: expected struct arm_smmu_ecmdq *ecmdq
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3573:23: sparse: got struct arm_smmu_ecmdq [noderef] __percpu *
>> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3578:58: sparse: sparse: incorrect type in initializer (different address spaces) @@ expected void const [noderef] __percpu *__vpp_verify @@ got struct arm_smmu_ecmdq * @@
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3578:58: sparse: expected void const [noderef] __percpu *__vpp_verify
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3578:58: sparse: got struct arm_smmu_ecmdq *
>> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3604:45: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected void const *addr @@ got void [noderef] __iomem *base @@
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3604:45: sparse: expected void const *addr
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3604:45: sparse: got void [noderef] __iomem *base
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c: note: in included file (through arch/arm64/include/asm/atomic.h, include/linux/atomic.h, include/asm-generic/bitops/atomic.h, ...):
arch/arm64/include/asm/cmpxchg.h:168:1: sparse: sparse: cast truncates bits from constant value (ffffffff80000000 becomes 0)
arch/arm64/include/asm/cmpxchg.h:168:1: sparse: sparse: cast truncates bits from constant value (ffffffff80000000 becomes 0)
vim +3573 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
3566
3567 static int arm_smmu_ecmdq_layout(struct arm_smmu_device *smmu)
3568 {
3569 int cpu;
3570 struct arm_smmu_ecmdq *ecmdq;
3571
3572 if (num_possible_cpus() <= smmu->nr_ecmdq) {
> 3573 ecmdq = devm_alloc_percpu(smmu->dev, *ecmdq);
3574 if (!ecmdq)
3575 return -ENOMEM;
3576
3577 for_each_possible_cpu(cpu)
> 3578 *per_cpu_ptr(smmu->ecmdq, cpu) = per_cpu_ptr(ecmdq, cpu);
3579
3580 /* A core requires at most one ECMDQ */
3581 smmu->nr_ecmdq = num_possible_cpus();
3582
3583 return 0;
3584 }
3585
3586 return -ENOSPC;
3587 }
3588
3589 static int arm_smmu_ecmdq_probe(struct arm_smmu_device *smmu)
3590 {
3591 int ret, cpu;
3592 u32 i, nump, numq, gap;
3593 u32 reg, shift_increment;
3594 u64 addr, smmu_dma_base;
3595 void __iomem *cp_regs, *cp_base;
3596
3597 /* IDR6 */
3598 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR6);
3599 nump = 1 << FIELD_GET(IDR6_LOG2NUMP, reg);
3600 numq = 1 << FIELD_GET(IDR6_LOG2NUMQ, reg);
3601 smmu->nr_ecmdq = nump * numq;
3602 gap = ECMDQ_CP_RRESET_SIZE >> FIELD_GET(IDR6_LOG2NUMQ, reg);
3603
> 3604 smmu_dma_base = (vmalloc_to_pfn(smmu->base) << PAGE_SHIFT);
3605 cp_regs = ioremap(smmu_dma_base + ARM_SMMU_ECMDQ_CP_BASE, PAGE_SIZE);
3606 if (!cp_regs)
3607 return -ENOMEM;
3608
3609 for (i = 0; i < nump; i++) {
3610 u64 val, pre_addr;
3611
3612 val = readq_relaxed(cp_regs + 32 * i);
3613 if (!(val & ECMDQ_CP_PRESET)) {
3614 iounmap(cp_regs);
3615 dev_err(smmu->dev, "ecmdq control page %u is memory mode\n", i);
3616 return -EFAULT;
3617 }
3618
3619 if (i && ((val & ECMDQ_CP_ADDR) != (pre_addr + ECMDQ_CP_RRESET_SIZE))) {
3620 iounmap(cp_regs);
3621 dev_err(smmu->dev, "ecmdq_cp memory region is not contiguous\n");
3622 return -EFAULT;
3623 }
3624
3625 pre_addr = val & ECMDQ_CP_ADDR;
3626 }
3627
3628 addr = readl_relaxed(cp_regs) & ECMDQ_CP_ADDR;
3629 iounmap(cp_regs);
3630
3631 cp_base = devm_ioremap(smmu->dev, smmu_dma_base + addr, ECMDQ_CP_RRESET_SIZE * nump);
3632 if (!cp_base)
3633 return -ENOMEM;
3634
3635 smmu->ecmdq = devm_alloc_percpu(smmu->dev, struct arm_smmu_ecmdq *);
3636 if (!smmu->ecmdq)
3637 return -ENOMEM;
3638
3639 ret = arm_smmu_ecmdq_layout(smmu);
3640 if (ret)
3641 return ret;
3642
3643 shift_increment = order_base_2(num_possible_cpus() / smmu->nr_ecmdq);
3644
3645 addr = 0;
3646 for_each_possible_cpu(cpu) {
3647 struct arm_smmu_ecmdq *ecmdq;
3648 struct arm_smmu_queue *q;
3649
3650 ecmdq = *per_cpu_ptr(smmu->ecmdq, cpu);
3651 ecmdq->base = cp_base + addr;
3652
3653 q = &ecmdq->cmdq.q;
3654
3655 q->llq.max_n_shift = ECMDQ_MAX_SZ_SHIFT + shift_increment;
3656 ret = arm_smmu_init_one_queue(smmu, q, ecmdq->base, ARM_SMMU_ECMDQ_PROD,
3657 ARM_SMMU_ECMDQ_CONS, CMDQ_ENT_DWORDS, "ecmdq");
3658 if (ret)
3659 return ret;
3660
3661 q->ecmdq_prod = ECMDQ_PROD_EN;
3662 rwlock_init(&q->ecmdq_lock);
3663
3664 ret = arm_smmu_ecmdq_init(&ecmdq->cmdq);
3665 if (ret) {
3666 dev_err(smmu->dev, "ecmdq[%d] init failed\n", i);
3667 return ret;
3668 }
3669
3670 addr += gap;
3671 }
3672
3673 return 0;
3674 }
3675
On 2023/7/27 19:13, kernel test robot wrote:
> Hi Tanmay,
>
> kernel test robot noticed the following build warnings:
>
> [auto build test WARNING on v6.5-rc2]
> [also build test WARNING on linus/master next-20230727]
> [cannot apply to joro-iommu/next]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch#_base_tree_information]
>
> url: https://github.com/intel-lab-lkp/linux/commits/Tanmay-Jagdale/iommu-arm-smmu-v3-Add-support-for-ECMDQ-register-mode/20230721-143913
> base: v6.5-rc2
> patch link: https://lore.kernel.org/r/20230721063513.33431-2-tanmay%40marvell.com
> patch subject: [RESEND PATCH 1/4] iommu/arm-smmu-v3: Add support for ECMDQ register mode
> config: arm64-randconfig-r083-20230726 (https://download.01.org/0day-ci/archive/20230727/202307271849.sqXiM3CK-lkp@intel.com/config)
> compiler: aarch64-linux-gcc (GCC) 12.3.0
> reproduce: (https://download.01.org/0day-ci/archive/20230727/202307271849.sqXiM3CK-lkp@intel.com/reproduce)
>
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <lkp@intel.com>
> | Closes: https://lore.kernel.org/oe-kbuild-all/202307271849.sqXiM3CK-lkp@intel.com/
>
> sparse warnings: (new ones prefixed by >>)
>>> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3573:23: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected struct arm_smmu_ecmdq *ecmdq @@ got struct arm_smmu_ecmdq [noderef] __percpu * @@
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3573:23: sparse: expected struct arm_smmu_ecmdq *ecmdq
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3573:23: sparse: got struct arm_smmu_ecmdq [noderef] __percpu *
OK, thanks, the local variable 'ecmdq' need modifier '__percpu'.
>>> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3578:58: sparse: sparse: incorrect type in initializer (different address spaces) @@ expected void const [noderef] __percpu *__vpp_verify @@ got struct arm_smmu_ecmdq * @@
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3578:58: sparse: expected void const [noderef] __percpu *__vpp_verify
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3578:58: sparse: got struct arm_smmu_ecmdq *
>>> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3604:45: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected void const *addr @@ got void [noderef] __iomem *base @@
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3604:45: sparse: expected void const *addr
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3604:45: sparse: got void [noderef] __iomem *base
Perhaps it would be better to add a member to the structure arm_smmu_device
to record the start physical address of the smmu.
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c: note: in included file (through arch/arm64/include/asm/atomic.h, include/linux/atomic.h, include/asm-generic/bitops/atomic.h, ...):
> arch/arm64/include/asm/cmpxchg.h:168:1: sparse: sparse: cast truncates bits from constant value (ffffffff80000000 becomes 0)
> arch/arm64/include/asm/cmpxchg.h:168:1: sparse: sparse: cast truncates bits from constant value (ffffffff80000000 becomes 0)
>
> vim +3573 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
>
> 3566
> 3567 static int arm_smmu_ecmdq_layout(struct arm_smmu_device *smmu)
> 3568 {
> 3569 int cpu;
> 3570 struct arm_smmu_ecmdq *ecmdq;
> 3571
> 3572 if (num_possible_cpus() <= smmu->nr_ecmdq) {
>> 3573 ecmdq = devm_alloc_percpu(smmu->dev, *ecmdq);
> 3574 if (!ecmdq)
> 3575 return -ENOMEM;
> 3576
> 3577 for_each_possible_cpu(cpu)
>> 3578 *per_cpu_ptr(smmu->ecmdq, cpu) = per_cpu_ptr(ecmdq, cpu);
> 3579
> 3580 /* A core requires at most one ECMDQ */
> 3581 smmu->nr_ecmdq = num_possible_cpus();
> 3582
> 3583 return 0;
> 3584 }
> 3585
> 3586 return -ENOSPC;
> 3587 }
> 3588
> 3589 static int arm_smmu_ecmdq_probe(struct arm_smmu_device *smmu)
> 3590 {
> 3591 int ret, cpu;
> 3592 u32 i, nump, numq, gap;
> 3593 u32 reg, shift_increment;
> 3594 u64 addr, smmu_dma_base;
> 3595 void __iomem *cp_regs, *cp_base;
> 3596
> 3597 /* IDR6 */
> 3598 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR6);
> 3599 nump = 1 << FIELD_GET(IDR6_LOG2NUMP, reg);
> 3600 numq = 1 << FIELD_GET(IDR6_LOG2NUMQ, reg);
> 3601 smmu->nr_ecmdq = nump * numq;
> 3602 gap = ECMDQ_CP_RRESET_SIZE >> FIELD_GET(IDR6_LOG2NUMQ, reg);
> 3603
>> 3604 smmu_dma_base = (vmalloc_to_pfn(smmu->base) << PAGE_SHIFT);
> 3605 cp_regs = ioremap(smmu_dma_base + ARM_SMMU_ECMDQ_CP_BASE, PAGE_SIZE);
> 3606 if (!cp_regs)
> 3607 return -ENOMEM;
> 3608
> 3609 for (i = 0; i < nump; i++) {
> 3610 u64 val, pre_addr;
> 3611
> 3612 val = readq_relaxed(cp_regs + 32 * i);
> 3613 if (!(val & ECMDQ_CP_PRESET)) {
> 3614 iounmap(cp_regs);
> 3615 dev_err(smmu->dev, "ecmdq control page %u is memory mode\n", i);
> 3616 return -EFAULT;
> 3617 }
> 3618
> 3619 if (i && ((val & ECMDQ_CP_ADDR) != (pre_addr + ECMDQ_CP_RRESET_SIZE))) {
> 3620 iounmap(cp_regs);
> 3621 dev_err(smmu->dev, "ecmdq_cp memory region is not contiguous\n");
> 3622 return -EFAULT;
> 3623 }
> 3624
> 3625 pre_addr = val & ECMDQ_CP_ADDR;
> 3626 }
> 3627
> 3628 addr = readl_relaxed(cp_regs) & ECMDQ_CP_ADDR;
> 3629 iounmap(cp_regs);
> 3630
> 3631 cp_base = devm_ioremap(smmu->dev, smmu_dma_base + addr, ECMDQ_CP_RRESET_SIZE * nump);
> 3632 if (!cp_base)
> 3633 return -ENOMEM;
> 3634
> 3635 smmu->ecmdq = devm_alloc_percpu(smmu->dev, struct arm_smmu_ecmdq *);
> 3636 if (!smmu->ecmdq)
> 3637 return -ENOMEM;
> 3638
> 3639 ret = arm_smmu_ecmdq_layout(smmu);
> 3640 if (ret)
> 3641 return ret;
> 3642
> 3643 shift_increment = order_base_2(num_possible_cpus() / smmu->nr_ecmdq);
> 3644
> 3645 addr = 0;
> 3646 for_each_possible_cpu(cpu) {
> 3647 struct arm_smmu_ecmdq *ecmdq;
> 3648 struct arm_smmu_queue *q;
> 3649
> 3650 ecmdq = *per_cpu_ptr(smmu->ecmdq, cpu);
> 3651 ecmdq->base = cp_base + addr;
> 3652
> 3653 q = &ecmdq->cmdq.q;
> 3654
> 3655 q->llq.max_n_shift = ECMDQ_MAX_SZ_SHIFT + shift_increment;
> 3656 ret = arm_smmu_init_one_queue(smmu, q, ecmdq->base, ARM_SMMU_ECMDQ_PROD,
> 3657 ARM_SMMU_ECMDQ_CONS, CMDQ_ENT_DWORDS, "ecmdq");
> 3658 if (ret)
> 3659 return ret;
> 3660
> 3661 q->ecmdq_prod = ECMDQ_PROD_EN;
> 3662 rwlock_init(&q->ecmdq_lock);
> 3663
> 3664 ret = arm_smmu_ecmdq_init(&ecmdq->cmdq);
> 3665 if (ret) {
> 3666 dev_err(smmu->dev, "ecmdq[%d] init failed\n", i);
> 3667 return ret;
> 3668 }
> 3669
> 3670 addr += gap;
> 3671 }
> 3672
> 3673 return 0;
> 3674 }
> 3675
>
Hi Tanmay,
kernel test robot noticed the following build warnings:
[auto build test WARNING on v6.5-rc2]
[also build test WARNING on linus/master next-20230728]
[cannot apply to joro-iommu/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Tanmay-Jagdale/iommu-arm-smmu-v3-Add-support-for-ECMDQ-register-mode/20230721-143913
base: v6.5-rc2
patch link: https://lore.kernel.org/r/20230721063513.33431-2-tanmay%40marvell.com
patch subject: [RESEND PATCH 1/4] iommu/arm-smmu-v3: Add support for ECMDQ register mode
config: arm64-randconfig-r032-20230727 (https://download.01.org/0day-ci/archive/20230728/202307281657.K3A7kRax-lkp@intel.com/config)
compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a)
reproduce: (https://download.01.org/0day-ci/archive/20230728/202307281657.K3A7kRax-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202307281657.K3A7kRax-lkp@intel.com/
All warnings (new ones prefixed by >>):
>> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3619:39: warning: variable 'pre_addr' is uninitialized when used here [-Wuninitialized]
3619 | if (i && ((val & ECMDQ_CP_ADDR) != (pre_addr + ECMDQ_CP_RRESET_SIZE))) {
| ^~~~~~~~
include/linux/compiler.h:55:47: note: expanded from macro 'if'
55 | #define if(cond, ...) if ( __trace_if_var( !!(cond , ## __VA_ARGS__) ) )
| ^~~~
include/linux/compiler.h:57:52: note: expanded from macro '__trace_if_var'
57 | #define __trace_if_var(cond) (__builtin_constant_p(cond) ? (cond) : __trace_if_value(cond))
| ^~~~
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3610:20: note: initialize the variable 'pre_addr' to silence this warning
3610 | u64 val, pre_addr;
| ^
| = 0
1 warning generated.
vim +/pre_addr +3619 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
3588
3589 static int arm_smmu_ecmdq_probe(struct arm_smmu_device *smmu)
3590 {
3591 int ret, cpu;
3592 u32 i, nump, numq, gap;
3593 u32 reg, shift_increment;
3594 u64 addr, smmu_dma_base;
3595 void __iomem *cp_regs, *cp_base;
3596
3597 /* IDR6 */
3598 reg = readl_relaxed(smmu->base + ARM_SMMU_IDR6);
3599 nump = 1 << FIELD_GET(IDR6_LOG2NUMP, reg);
3600 numq = 1 << FIELD_GET(IDR6_LOG2NUMQ, reg);
3601 smmu->nr_ecmdq = nump * numq;
3602 gap = ECMDQ_CP_RRESET_SIZE >> FIELD_GET(IDR6_LOG2NUMQ, reg);
3603
3604 smmu_dma_base = (vmalloc_to_pfn(smmu->base) << PAGE_SHIFT);
3605 cp_regs = ioremap(smmu_dma_base + ARM_SMMU_ECMDQ_CP_BASE, PAGE_SIZE);
3606 if (!cp_regs)
3607 return -ENOMEM;
3608
3609 for (i = 0; i < nump; i++) {
3610 u64 val, pre_addr;
3611
3612 val = readq_relaxed(cp_regs + 32 * i);
3613 if (!(val & ECMDQ_CP_PRESET)) {
3614 iounmap(cp_regs);
3615 dev_err(smmu->dev, "ecmdq control page %u is memory mode\n", i);
3616 return -EFAULT;
3617 }
3618
> 3619 if (i && ((val & ECMDQ_CP_ADDR) != (pre_addr + ECMDQ_CP_RRESET_SIZE))) {
3620 iounmap(cp_regs);
3621 dev_err(smmu->dev, "ecmdq_cp memory region is not contiguous\n");
3622 return -EFAULT;
3623 }
3624
3625 pre_addr = val & ECMDQ_CP_ADDR;
3626 }
3627
3628 addr = readl_relaxed(cp_regs) & ECMDQ_CP_ADDR;
3629 iounmap(cp_regs);
3630
3631 cp_base = devm_ioremap(smmu->dev, smmu_dma_base + addr, ECMDQ_CP_RRESET_SIZE * nump);
3632 if (!cp_base)
3633 return -ENOMEM;
3634
3635 smmu->ecmdq = devm_alloc_percpu(smmu->dev, struct arm_smmu_ecmdq *);
3636 if (!smmu->ecmdq)
3637 return -ENOMEM;
3638
3639 ret = arm_smmu_ecmdq_layout(smmu);
3640 if (ret)
3641 return ret;
3642
3643 shift_increment = order_base_2(num_possible_cpus() / smmu->nr_ecmdq);
3644
3645 addr = 0;
3646 for_each_possible_cpu(cpu) {
3647 struct arm_smmu_ecmdq *ecmdq;
3648 struct arm_smmu_queue *q;
3649
3650 ecmdq = *per_cpu_ptr(smmu->ecmdq, cpu);
3651 ecmdq->base = cp_base + addr;
3652
3653 q = &ecmdq->cmdq.q;
3654
3655 q->llq.max_n_shift = ECMDQ_MAX_SZ_SHIFT + shift_increment;
3656 ret = arm_smmu_init_one_queue(smmu, q, ecmdq->base, ARM_SMMU_ECMDQ_PROD,
3657 ARM_SMMU_ECMDQ_CONS, CMDQ_ENT_DWORDS, "ecmdq");
3658 if (ret)
3659 return ret;
3660
3661 q->ecmdq_prod = ECMDQ_PROD_EN;
3662 rwlock_init(&q->ecmdq_lock);
3663
3664 ret = arm_smmu_ecmdq_init(&ecmdq->cmdq);
3665 if (ret) {
3666 dev_err(smmu->dev, "ecmdq[%d] init failed\n", i);
3667 return ret;
3668 }
3669
3670 addr += gap;
3671 }
3672
3673 return 0;
3674 }
3675
@@ -347,6 +347,14 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
{
+ if (smmu->ecmdq_enabled) {
+ struct arm_smmu_ecmdq *ecmdq;
+
+ ecmdq = *this_cpu_ptr(smmu->ecmdq);
+
+ return &ecmdq->cmdq;
+ }
+
return &smmu->cmdq;
}
@@ -429,6 +437,38 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
}
+static void arm_smmu_ecmdq_skip_err(struct arm_smmu_device *smmu)
+{
+ int i;
+ u32 prod, cons;
+ struct arm_smmu_queue *q;
+ struct arm_smmu_ecmdq *ecmdq;
+
+ for (i = 0; i < smmu->nr_ecmdq; i++) {
+ unsigned long flags;
+
+ ecmdq = *per_cpu_ptr(smmu->ecmdq, i);
+ q = &ecmdq->cmdq.q;
+
+ prod = readl_relaxed(q->prod_reg);
+ cons = readl_relaxed(q->cons_reg);
+ if (((prod ^ cons) & ECMDQ_CONS_ERR) == 0)
+ continue;
+
+ __arm_smmu_cmdq_skip_err(smmu, q);
+
+ write_lock_irqsave(&q->ecmdq_lock, flags);
+ q->ecmdq_prod &= ~ECMDQ_PROD_ERRACK;
+ q->ecmdq_prod |= cons & ECMDQ_CONS_ERR;
+
+ prod = readl_relaxed(q->prod_reg);
+ prod &= ~ECMDQ_PROD_ERRACK;
+ prod |= cons & ECMDQ_CONS_ERR;
+ writel(prod, q->prod_reg);
+ write_unlock_irqrestore(&q->ecmdq_lock, flags);
+ }
+}
+
/*
* Command queue locking.
* This is a form of bastardised rwlock with the following major changes:
@@ -825,7 +865,13 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
* d. Advance the hardware prod pointer
* Control dependency ordering from the entries becoming valid.
*/
- writel_relaxed(prod, cmdq->q.prod_reg);
+ if (smmu->ecmdq_enabled) {
+ read_lock(&cmdq->q.ecmdq_lock);
+ writel_relaxed(prod | cmdq->q.ecmdq_prod, cmdq->q.prod_reg);
+ read_unlock(&cmdq->q.ecmdq_lock);
+ } else {
+ writel_relaxed(prod, cmdq->q.prod_reg);
+ }
/*
* e. Tell the next owner we're done
@@ -1701,6 +1747,9 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
if (active & GERROR_CMDQ_ERR)
arm_smmu_cmdq_skip_err(smmu);
+ if (active & GERROR_CMDQP_ERR)
+ arm_smmu_ecmdq_skip_err(smmu);
+
writel(gerror, smmu->base + ARM_SMMU_GERRORN);
return IRQ_HANDLED;
}
@@ -2957,6 +3006,20 @@ static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
return 0;
}
+static int arm_smmu_ecmdq_init(struct arm_smmu_cmdq *cmdq)
+{
+ unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
+
+ atomic_set(&cmdq->owner_prod, 0);
+ atomic_set(&cmdq->lock, 0);
+
+ cmdq->valid_map = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
+ if (!cmdq->valid_map)
+ return -ENOMEM;
+
+ return 0;
+}
+
static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
{
int ret;
@@ -3307,6 +3370,7 @@ static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
{
+ int i;
int ret;
u32 reg, enables;
struct arm_smmu_cmdq_ent cmd;
@@ -3351,6 +3415,28 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
+ for (i = 0; i < smmu->nr_ecmdq; i++) {
+ struct arm_smmu_ecmdq *ecmdq;
+ struct arm_smmu_queue *q;
+
+ ecmdq = *per_cpu_ptr(smmu->ecmdq, i);
+ q = &ecmdq->cmdq.q;
+
+ writeq_relaxed(q->q_base, ecmdq->base + ARM_SMMU_ECMDQ_BASE);
+ writel_relaxed(q->llq.prod, ecmdq->base + ARM_SMMU_ECMDQ_PROD);
+ writel_relaxed(q->llq.cons, ecmdq->base + ARM_SMMU_ECMDQ_CONS);
+
+ /* enable ecmdq */
+ writel(ECMDQ_PROD_EN, q->prod_reg);
+ ret = readl_relaxed_poll_timeout(q->cons_reg, reg, reg & ECMDQ_CONS_ENACK,
+ 1, ARM_SMMU_POLL_TIMEOUT_US);
+ if (ret) {
+ dev_err(smmu->dev, "ecmdq[%d] enable failed\n", i);
+ smmu->ecmdq_enabled = 0;
+ break;
+ }
+ }
+
enables = CR0_CMDQEN;
ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
ARM_SMMU_CR0ACK);
@@ -3476,6 +3562,115 @@ static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
}
break;
}
+};
+
+static int arm_smmu_ecmdq_layout(struct arm_smmu_device *smmu)
+{
+ int cpu;
+ struct arm_smmu_ecmdq *ecmdq;
+
+ if (num_possible_cpus() <= smmu->nr_ecmdq) {
+ ecmdq = devm_alloc_percpu(smmu->dev, *ecmdq);
+ if (!ecmdq)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu)
+ *per_cpu_ptr(smmu->ecmdq, cpu) = per_cpu_ptr(ecmdq, cpu);
+
+ /* A core requires at most one ECMDQ */
+ smmu->nr_ecmdq = num_possible_cpus();
+
+ return 0;
+ }
+
+ return -ENOSPC;
+}
+
+static int arm_smmu_ecmdq_probe(struct arm_smmu_device *smmu)
+{
+ int ret, cpu;
+ u32 i, nump, numq, gap;
+ u32 reg, shift_increment;
+ u64 addr, smmu_dma_base;
+ void __iomem *cp_regs, *cp_base;
+
+ /* IDR6 */
+ reg = readl_relaxed(smmu->base + ARM_SMMU_IDR6);
+ nump = 1 << FIELD_GET(IDR6_LOG2NUMP, reg);
+ numq = 1 << FIELD_GET(IDR6_LOG2NUMQ, reg);
+ smmu->nr_ecmdq = nump * numq;
+ gap = ECMDQ_CP_RRESET_SIZE >> FIELD_GET(IDR6_LOG2NUMQ, reg);
+
+ smmu_dma_base = (vmalloc_to_pfn(smmu->base) << PAGE_SHIFT);
+ cp_regs = ioremap(smmu_dma_base + ARM_SMMU_ECMDQ_CP_BASE, PAGE_SIZE);
+ if (!cp_regs)
+ return -ENOMEM;
+
+ for (i = 0; i < nump; i++) {
+ u64 val, pre_addr;
+
+ val = readq_relaxed(cp_regs + 32 * i);
+ if (!(val & ECMDQ_CP_PRESET)) {
+ iounmap(cp_regs);
+ dev_err(smmu->dev, "ecmdq control page %u is memory mode\n", i);
+ return -EFAULT;
+ }
+
+ if (i && ((val & ECMDQ_CP_ADDR) != (pre_addr + ECMDQ_CP_RRESET_SIZE))) {
+ iounmap(cp_regs);
+ dev_err(smmu->dev, "ecmdq_cp memory region is not contiguous\n");
+ return -EFAULT;
+ }
+
+ pre_addr = val & ECMDQ_CP_ADDR;
+ }
+
+ addr = readl_relaxed(cp_regs) & ECMDQ_CP_ADDR;
+ iounmap(cp_regs);
+
+ cp_base = devm_ioremap(smmu->dev, smmu_dma_base + addr, ECMDQ_CP_RRESET_SIZE * nump);
+ if (!cp_base)
+ return -ENOMEM;
+
+ smmu->ecmdq = devm_alloc_percpu(smmu->dev, struct arm_smmu_ecmdq *);
+ if (!smmu->ecmdq)
+ return -ENOMEM;
+
+ ret = arm_smmu_ecmdq_layout(smmu);
+ if (ret)
+ return ret;
+
+ shift_increment = order_base_2(num_possible_cpus() / smmu->nr_ecmdq);
+
+ addr = 0;
+ for_each_possible_cpu(cpu) {
+ struct arm_smmu_ecmdq *ecmdq;
+ struct arm_smmu_queue *q;
+
+ ecmdq = *per_cpu_ptr(smmu->ecmdq, cpu);
+ ecmdq->base = cp_base + addr;
+
+ q = &ecmdq->cmdq.q;
+
+ q->llq.max_n_shift = ECMDQ_MAX_SZ_SHIFT + shift_increment;
+ ret = arm_smmu_init_one_queue(smmu, q, ecmdq->base, ARM_SMMU_ECMDQ_PROD,
+ ARM_SMMU_ECMDQ_CONS, CMDQ_ENT_DWORDS, "ecmdq");
+ if (ret)
+ return ret;
+
+ q->ecmdq_prod = ECMDQ_PROD_EN;
+ rwlock_init(&q->ecmdq_lock);
+
+ ret = arm_smmu_ecmdq_init(&ecmdq->cmdq);
+ if (ret) {
+ dev_err(smmu->dev, "ecmdq[%d] init failed\n", i);
+ return ret;
+ }
+
+ addr += gap;
+ }
+
+ return 0;
}
static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
@@ -3588,6 +3783,9 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
return -ENXIO;
}
+ if (reg & IDR1_ECMDQ)
+ smmu->features |= ARM_SMMU_FEAT_ECMDQ;
+
/* Queue sizes, capped to ensure natural alignment */
smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
FIELD_GET(IDR1_CMDQS, reg));
@@ -3695,6 +3893,16 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
smmu->ias, smmu->oas, smmu->features);
+
+ if (smmu->features & ARM_SMMU_FEAT_ECMDQ) {
+ int err;
+
+ err = arm_smmu_ecmdq_probe(smmu);
+ if (err) {
+ dev_err(smmu->dev, "suppress ecmdq feature, errno=%d\n", err);
+ smmu->ecmdq_enabled = 0;
+ }
+ }
return 0;
}
@@ -41,6 +41,7 @@
#define IDR0_S2P (1 << 0)
#define ARM_SMMU_IDR1 0x4
+#define IDR1_ECMDQ (1 << 31)
#define IDR1_TABLES_PRESET (1 << 30)
#define IDR1_QUEUES_PRESET (1 << 29)
#define IDR1_REL (1 << 28)
@@ -113,6 +114,7 @@
#define ARM_SMMU_IRQ_CTRLACK 0x54
#define ARM_SMMU_GERROR 0x60
+#define GERROR_CMDQP_ERR (1 << 9)
#define GERROR_SFM_ERR (1 << 8)
#define GERROR_MSI_GERROR_ABT_ERR (1 << 7)
#define GERROR_MSI_PRIQ_ABT_ERR (1 << 6)
@@ -158,6 +160,26 @@
#define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8
#define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc
+#define ARM_SMMU_IDR6 0x190
+#define IDR6_LOG2NUMP GENMASK(27, 24)
+#define IDR6_LOG2NUMQ GENMASK(19, 16)
+#define IDR6_BA_DOORBELLS GENMASK(9, 0)
+
+#define ARM_SMMU_ECMDQ_BASE 0x00
+#define ARM_SMMU_ECMDQ_PROD 0x08
+#define ARM_SMMU_ECMDQ_CONS 0x0c
+#define ECMDQ_MAX_SZ_SHIFT 8
+#define ECMDQ_PROD_EN (1 << 31)
+#define ECMDQ_CONS_ENACK (1 << 31)
+#define ECMDQ_CONS_ERR (1 << 23)
+#define ECMDQ_PROD_ERRACK (1 << 23)
+
+#define ARM_SMMU_ECMDQ_CP_BASE 0x4000
+#define ECMDQ_CP_ADDR GENMASK_ULL(51, 12)
+#define ECMDQ_CP_CMDQGS GENMASK_ULL(2, 1)
+#define ECMDQ_CP_PRESET (1UL << 0)
+#define ECMDQ_CP_RRESET_SIZE 0x10000
+
#define ARM_SMMU_REG_SZ 0xe00
/* Common MSI config fields */
@@ -527,6 +549,8 @@ struct arm_smmu_ll_queue {
struct arm_smmu_queue {
struct arm_smmu_ll_queue llq;
int irq; /* Wired interrupt */
+ u32 ecmdq_prod;
+ rwlock_t ecmdq_lock;
__le64 *base;
dma_addr_t base_dma;
@@ -552,6 +576,11 @@ struct arm_smmu_cmdq {
atomic_t lock;
};
+struct arm_smmu_ecmdq {
+ struct arm_smmu_cmdq cmdq;
+ void __iomem *base;
+};
+
struct arm_smmu_cmdq_batch {
u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
int num;
@@ -646,6 +675,7 @@ struct arm_smmu_device {
#define ARM_SMMU_FEAT_SVA (1 << 17)
#define ARM_SMMU_FEAT_E2H (1 << 18)
#define ARM_SMMU_FEAT_NESTING (1 << 19)
+#define ARM_SMMU_FEAT_ECMDQ (1 << 20)
u32 features;
#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
@@ -654,6 +684,12 @@ struct arm_smmu_device {
#define ARM_SMMU_OPT_CMDQ_FORCE_SYNC (1 << 3)
u32 options;
+ union {
+ u32 nr_ecmdq;
+ u32 ecmdq_enabled;
+ };
+ struct arm_smmu_ecmdq *__percpu *ecmdq;
+
struct arm_smmu_cmdq cmdq;
struct arm_smmu_evtq evtq;
struct arm_smmu_priq priq;