iommu/arm-smmu-v3: Change vmid alloc strategy from bitmap to ida

Message ID TYCP286MB2323E0C525FF9F94E3B07C7ACA35A@TYCP286MB2323.JPNP286.PROD.OUTLOOK.COM
State New
Headers
Series iommu/arm-smmu-v3: Change vmid alloc strategy from bitmap to ida |

Commit Message

Dawei Li July 15, 2023, 4:16 p.m. UTC
  For current implementation of vmid allocation of arm smmu-v3, a per-smmu
devide bitmap of 64K bits(8K bytes) is allocated on behalf of possible VMID
range, which is two pages for some architectures. Besides that, its memory
consumption is 'static', despite of how many VMIDs are allocated actually.

That's memory inefficient and lack of scalability.

So an IDA based implementation is introduced to address this issue, which
is capable of self-expanding on the actual need of VMID allocation.

Signed-off-by: Dawei Li <set_pte_at@outlook.com>
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 29 +++++----------------
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  2 +-
 2 files changed, 8 insertions(+), 23 deletions(-)
  

Comments

Jason Gunthorpe July 25, 2023, 1:41 p.m. UTC | #1
On Sun, Jul 16, 2023 at 12:16:21AM +0800, Dawei Li wrote:
> For current implementation of vmid allocation of arm smmu-v3, a per-smmu
> devide bitmap of 64K bits(8K bytes) is allocated on behalf of possible VMID
> range, which is two pages for some architectures. Besides that, its memory
> consumption is 'static', despite of how many VMIDs are allocated actually.

Is there an actual problem here? Allocating a single 8k page at early
boot doesn't seem like a burden? Are there alot of smmu instances?

> So an IDA based implementation is introduced to address this issue, which
> is capable of self-expanding on the actual need of VMID allocation.

However, I agree IDA is just generally better and generally drivers
shouldn't be open coding it

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>

Jason
  
Will Deacon Aug. 1, 2023, 8:41 p.m. UTC | #2
On Sun, 16 Jul 2023 00:16:21 +0800, Dawei Li wrote:
> For current implementation of vmid allocation of arm smmu-v3, a per-smmu
> devide bitmap of 64K bits(8K bytes) is allocated on behalf of possible VMID
> range, which is two pages for some architectures. Besides that, its memory
> consumption is 'static', despite of how many VMIDs are allocated actually.
> 
> That's memory inefficient and lack of scalability.
> 
> [...]

Applied to will (for-joerg/arm-smmu/updates), thanks!

[1/1] iommu/arm-smmu-v3: Change vmid alloc strategy from bitmap to ida
      https://git.kernel.org/will/c/1672730cffaf

Cheers,
  

Patch

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 9b0dc3505601..d9487602701f 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2055,24 +2055,6 @@  static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
 	return &smmu_domain->domain;
 }
 
-static int arm_smmu_bitmap_alloc(unsigned long *map, int span)
-{
-	int idx, size = 1 << span;
-
-	do {
-		idx = find_first_zero_bit(map, size);
-		if (idx == size)
-			return -ENOSPC;
-	} while (test_and_set_bit(idx, map));
-
-	return idx;
-}
-
-static void arm_smmu_bitmap_free(unsigned long *map, int idx)
-{
-	clear_bit(idx, map);
-}
-
 static void arm_smmu_domain_free(struct iommu_domain *domain)
 {
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
@@ -2093,7 +2075,7 @@  static void arm_smmu_domain_free(struct iommu_domain *domain)
 	} else {
 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
 		if (cfg->vmid)
-			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
+			ida_free(&smmu->vmid_map, cfg->vmid);
 	}
 
 	kfree(smmu_domain);
@@ -2167,7 +2149,9 @@  static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
 
-	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
+	/* Reserve VMID 0 for stage-2 bypass STEs */
+	vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1,
+			       GFP_KERNEL);
 	if (vmid < 0)
 		return vmid;
 
@@ -3098,8 +3082,8 @@  static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
 	reg |= STRTAB_BASE_RA;
 	smmu->strtab_cfg.strtab_base = reg;
 
-	/* Allocate the first VMID for stage-2 bypass STEs */
-	set_bit(0, smmu->vmid_map);
+	ida_init(&smmu->vmid_map);
+
 	return 0;
 }
 
@@ -3923,6 +3907,7 @@  static void arm_smmu_device_remove(struct platform_device *pdev)
 	iommu_device_sysfs_remove(&smmu->iommu);
 	arm_smmu_device_disable(smmu);
 	iopf_queue_free(smmu->evtq.iopf);
+	ida_destroy(&smmu->vmid_map);
 }
 
 static void arm_smmu_device_shutdown(struct platform_device *pdev)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index dcab85698a4e..9915850dd4db 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -670,7 +670,7 @@  struct arm_smmu_device {
 
 #define ARM_SMMU_MAX_VMIDS		(1 << 16)
 	unsigned int			vmid_bits;
-	DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
+	struct ida			vmid_map;
 
 	unsigned int			ssid_bits;
 	unsigned int			sid_bits;