iommu/smmu-v3: Refactor smmu stream data structure

Message ID TYTP286MB3564A9E3854CACDEE366D59DCAC9A@TYTP286MB3564.JPNP286.PROD.OUTLOOK.COM
State New
Headers
Series iommu/smmu-v3: Refactor smmu stream data structure |

Commit Message

Dawei Li Oct. 6, 2023, 2:56 p.m. UTC
  The relationship between stream ID and stream can be abstracted in K-V
mappings, in which stream is identified uniquely by ID, but with notes
below:

<1> The number of streams is varied.
<2> The stream IDs are not necessarily contigeous, they can be sparsely
    ranged.

Xarray is a data structure which can deal with interger-pointer mapping
in cache-friendly way.

Compared with current rbtree implementation, xarray are more handy for
index-orinted operations:
<1> Find/locate an entry based on a key  //xa_load
<2> Associating a key with an entry. //xa_insert

Rbtree version for ops above will be cumbersome, for it is implementer's
duty to specify full logic of lookup, such as key comparision and so on.

As such, re-implement mapping between stream ID and streams from rbtree to
xarray.

Signed-off-by: Dawei Li <set_pte_at@outlook.com>
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 57 ++++++---------------
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  3 +-
 2 files changed, 16 insertions(+), 44 deletions(-)
  

Comments

Robin Murphy Oct. 6, 2023, 4:45 p.m. UTC | #1
On 2023-10-06 15:56, Dawei Li wrote:
> The relationship between stream ID and stream can be abstracted in K-V
> mappings, in which stream is identified uniquely by ID, but with notes
> below:
> 
> <1> The number of streams is varied.
> <2> The stream IDs are not necessarily contigeous, they can be sparsely
>      ranged.
> 
> Xarray is a data structure which can deal with interger-pointer mapping
> in cache-friendly way.

Cache effects shouldn't really matter here, however last time it came 
up[1] there was a question of memory consumption increasing. Have you 
tested the difference in performance and memory overhead, and if so, on 
what size of system?

> Compared with current rbtree implementation, xarray are more handy for
> index-orinted operations:
> <1> Find/locate an entry based on a key  //xa_load
> <2> Associating a key with an entry. //xa_insert
> 
> Rbtree version for ops above will be cumbersome, for it is implementer's

"will be" ? AFAICS it's already implemented, and judging by the diffstat 
below it wasn't all that bad really ;)

> duty to specify full logic of lookup, such as key comparision and so on.
> 
> As such, re-implement mapping between stream ID and streams from rbtree to
> xarray.
> 
> Signed-off-by: Dawei Li <set_pte_at@outlook.com>
> ---
>   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 57 ++++++---------------
>   drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  3 +-
>   2 files changed, 16 insertions(+), 44 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index bd0a596f9863..31cf1c0d0a88 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -489,7 +489,6 @@ static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
>   	local_irq_restore(flags);					\
>   })
>   
> -
>   /*
>    * Command queue insertion.
>    * This is made fiddly by our attempts to achieve some sort of scalability
> @@ -1446,23 +1445,13 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
>   static struct arm_smmu_master *
>   arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
>   {
> -	struct rb_node *node;
>   	struct arm_smmu_stream *stream;
>   
>   	lockdep_assert_held(&smmu->streams_mutex);
>   
> -	node = smmu->streams.rb_node;
> -	while (node) {
> -		stream = rb_entry(node, struct arm_smmu_stream, node);
> -		if (stream->id < sid)
> -			node = node->rb_right;
> -		else if (stream->id > sid)
> -			node = node->rb_left;
> -		else
> -			return stream->master;
> -	}
> +	stream = xa_load(&smmu->streams, sid);
>   
> -	return NULL;
> +	return stream ? stream->master : NULL;
>   }
>   
>   /* IRQ and event handlers */
> @@ -2573,11 +2562,10 @@ static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
>   static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
>   				  struct arm_smmu_master *master)
>   {
> -	int i;
> -	int ret = 0;
> -	struct arm_smmu_stream *new_stream, *cur_stream;
> -	struct rb_node **new_node, *parent_node = NULL;
>   	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
> +	struct arm_smmu_stream *new_stream;
> +	int ret = 0;
> +	int i;
>   
>   	master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
>   				  GFP_KERNEL);
> @@ -2597,34 +2585,18 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
>   		if (ret)
>   			break;
>   
> -		/* Insert into SID tree */
> -		new_node = &(smmu->streams.rb_node);
> -		while (*new_node) {
> -			cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
> -					      node);
> -			parent_node = *new_node;
> -			if (cur_stream->id > new_stream->id) {
> -				new_node = &((*new_node)->rb_left);
> -			} else if (cur_stream->id < new_stream->id) {
> -				new_node = &((*new_node)->rb_right);
> -			} else {
> -				dev_warn(master->dev,
> -					 "stream %u already in tree\n",
> -					 cur_stream->id);
> -				ret = -EINVAL;
> -				break;
> -			}
> -		}
> -		if (ret)
> +		ret = xa_insert(&smmu->streams, sid, new_stream, GFP_KERNEL);
> +		if (ret) {
> +			if (ret == -EBUSY)
> +				dev_warn(master->dev, "stream %u already binded\n",

Nit: "bound", however I think "in use" would be even better, to avoid 
confusion with SVA terminology.

Thanks,
Robin.

[1] 
https://lore.kernel.org/linux-iommu/ecb3725c-27c4-944b-b42c-f4e293521f94@arm.com/

> +					 sid);
>   			break;
> -
> -		rb_link_node(&new_stream->node, parent_node, new_node);
> -		rb_insert_color(&new_stream->node, &smmu->streams);
> +		}
>   	}
>   
>   	if (ret) {
>   		for (i--; i >= 0; i--)
> -			rb_erase(&master->streams[i].node, &smmu->streams);
> +			xa_erase(&smmu->streams, master->streams[i].id);
>   		kfree(master->streams);
>   	}
>   	mutex_unlock(&smmu->streams_mutex);
> @@ -2643,7 +2615,7 @@ static void arm_smmu_remove_master(struct arm_smmu_master *master)
>   
>   	mutex_lock(&smmu->streams_mutex);
>   	for (i = 0; i < fwspec->num_ids; i++)
> -		rb_erase(&master->streams[i].node, &smmu->streams);
> +		xa_erase(&smmu->streams, master->streams[i].id);
>   	mutex_unlock(&smmu->streams_mutex);
>   
>   	kfree(master->streams);
> @@ -3097,7 +3069,7 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
>   	int ret;
>   
>   	mutex_init(&smmu->streams_mutex);
> -	smmu->streams = RB_ROOT;
> +	xa_init(&smmu->streams);
>   
>   	ret = arm_smmu_init_queues(smmu);
>   	if (ret)
> @@ -3913,6 +3885,7 @@ static void arm_smmu_device_remove(struct platform_device *pdev)
>   	arm_smmu_device_disable(smmu);
>   	iopf_queue_free(smmu->evtq.iopf);
>   	ida_destroy(&smmu->vmid_map);
> +	xa_destroy(&smmu->streams);
>   }
>   
>   static void arm_smmu_device_shutdown(struct platform_device *pdev)
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> index 9915850dd4db..f500754d4fbe 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> @@ -680,14 +680,13 @@ struct arm_smmu_device {
>   	/* IOMMU core code handle */
>   	struct iommu_device		iommu;
>   
> -	struct rb_root			streams;
> +	struct xarray			streams;
>   	struct mutex			streams_mutex;
>   };
>   
>   struct arm_smmu_stream {
>   	u32				id;
>   	struct arm_smmu_master		*master;
> -	struct rb_node			node;
>   };
>   
>   /* SMMU private data for each master */
  

Patch

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index bd0a596f9863..31cf1c0d0a88 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -489,7 +489,6 @@  static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
 	local_irq_restore(flags);					\
 })
 
-
 /*
  * Command queue insertion.
  * This is made fiddly by our attempts to achieve some sort of scalability
@@ -1446,23 +1445,13 @@  static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
 static struct arm_smmu_master *
 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
 {
-	struct rb_node *node;
 	struct arm_smmu_stream *stream;
 
 	lockdep_assert_held(&smmu->streams_mutex);
 
-	node = smmu->streams.rb_node;
-	while (node) {
-		stream = rb_entry(node, struct arm_smmu_stream, node);
-		if (stream->id < sid)
-			node = node->rb_right;
-		else if (stream->id > sid)
-			node = node->rb_left;
-		else
-			return stream->master;
-	}
+	stream = xa_load(&smmu->streams, sid);
 
-	return NULL;
+	return stream ? stream->master : NULL;
 }
 
 /* IRQ and event handlers */
@@ -2573,11 +2562,10 @@  static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
 				  struct arm_smmu_master *master)
 {
-	int i;
-	int ret = 0;
-	struct arm_smmu_stream *new_stream, *cur_stream;
-	struct rb_node **new_node, *parent_node = NULL;
 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
+	struct arm_smmu_stream *new_stream;
+	int ret = 0;
+	int i;
 
 	master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
 				  GFP_KERNEL);
@@ -2597,34 +2585,18 @@  static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
 		if (ret)
 			break;
 
-		/* Insert into SID tree */
-		new_node = &(smmu->streams.rb_node);
-		while (*new_node) {
-			cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
-					      node);
-			parent_node = *new_node;
-			if (cur_stream->id > new_stream->id) {
-				new_node = &((*new_node)->rb_left);
-			} else if (cur_stream->id < new_stream->id) {
-				new_node = &((*new_node)->rb_right);
-			} else {
-				dev_warn(master->dev,
-					 "stream %u already in tree\n",
-					 cur_stream->id);
-				ret = -EINVAL;
-				break;
-			}
-		}
-		if (ret)
+		ret = xa_insert(&smmu->streams, sid, new_stream, GFP_KERNEL);
+		if (ret) {
+			if (ret == -EBUSY)
+				dev_warn(master->dev, "stream %u already binded\n",
+					 sid);
 			break;
-
-		rb_link_node(&new_stream->node, parent_node, new_node);
-		rb_insert_color(&new_stream->node, &smmu->streams);
+		}
 	}
 
 	if (ret) {
 		for (i--; i >= 0; i--)
-			rb_erase(&master->streams[i].node, &smmu->streams);
+			xa_erase(&smmu->streams, master->streams[i].id);
 		kfree(master->streams);
 	}
 	mutex_unlock(&smmu->streams_mutex);
@@ -2643,7 +2615,7 @@  static void arm_smmu_remove_master(struct arm_smmu_master *master)
 
 	mutex_lock(&smmu->streams_mutex);
 	for (i = 0; i < fwspec->num_ids; i++)
-		rb_erase(&master->streams[i].node, &smmu->streams);
+		xa_erase(&smmu->streams, master->streams[i].id);
 	mutex_unlock(&smmu->streams_mutex);
 
 	kfree(master->streams);
@@ -3097,7 +3069,7 @@  static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
 	int ret;
 
 	mutex_init(&smmu->streams_mutex);
-	smmu->streams = RB_ROOT;
+	xa_init(&smmu->streams);
 
 	ret = arm_smmu_init_queues(smmu);
 	if (ret)
@@ -3913,6 +3885,7 @@  static void arm_smmu_device_remove(struct platform_device *pdev)
 	arm_smmu_device_disable(smmu);
 	iopf_queue_free(smmu->evtq.iopf);
 	ida_destroy(&smmu->vmid_map);
+	xa_destroy(&smmu->streams);
 }
 
 static void arm_smmu_device_shutdown(struct platform_device *pdev)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 9915850dd4db..f500754d4fbe 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -680,14 +680,13 @@  struct arm_smmu_device {
 	/* IOMMU core code handle */
 	struct iommu_device		iommu;
 
-	struct rb_root			streams;
+	struct xarray			streams;
 	struct mutex			streams_mutex;
 };
 
 struct arm_smmu_stream {
 	u32				id;
 	struct arm_smmu_master		*master;
-	struct rb_node			node;
 };
 
 /* SMMU private data for each master */