iommu/smmu-v3: Refactor smmu stream data structure
Commit Message
The relationship between stream ID and stream can be abstracted in K-V
mappings, in which stream is identified uniquely by ID, but with notes
below:
<1> The number of streams is varied.
<2> The stream IDs are not necessarily contigeous, they can be sparsely
ranged.
Xarray is a data structure which can deal with interger-pointer mapping
in cache-friendly way.
Compared with current rbtree implementation, xarray are more handy for
index-orinted operations:
<1> Find/locate an entry based on a key //xa_load
<2> Associating a key with an entry. //xa_insert
Rbtree version for ops above will be cumbersome, for it is implementer's
duty to specify full logic of lookup, such as key comparision and so on.
As such, re-implement mapping between stream ID and streams from rbtree to
xarray.
Signed-off-by: Dawei Li <set_pte_at@outlook.com>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 57 ++++++---------------
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 +-
2 files changed, 16 insertions(+), 44 deletions(-)
Comments
On 2023-10-06 15:56, Dawei Li wrote:
> The relationship between stream ID and stream can be abstracted in K-V
> mappings, in which stream is identified uniquely by ID, but with notes
> below:
>
> <1> The number of streams is varied.
> <2> The stream IDs are not necessarily contigeous, they can be sparsely
> ranged.
>
> Xarray is a data structure which can deal with interger-pointer mapping
> in cache-friendly way.
Cache effects shouldn't really matter here, however last time it came
up[1] there was a question of memory consumption increasing. Have you
tested the difference in performance and memory overhead, and if so, on
what size of system?
> Compared with current rbtree implementation, xarray are more handy for
> index-orinted operations:
> <1> Find/locate an entry based on a key //xa_load
> <2> Associating a key with an entry. //xa_insert
>
> Rbtree version for ops above will be cumbersome, for it is implementer's
"will be" ? AFAICS it's already implemented, and judging by the diffstat
below it wasn't all that bad really ;)
> duty to specify full logic of lookup, such as key comparision and so on.
>
> As such, re-implement mapping between stream ID and streams from rbtree to
> xarray.
>
> Signed-off-by: Dawei Li <set_pte_at@outlook.com>
> ---
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 57 ++++++---------------
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 +-
> 2 files changed, 16 insertions(+), 44 deletions(-)
>
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index bd0a596f9863..31cf1c0d0a88 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -489,7 +489,6 @@ static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
> local_irq_restore(flags); \
> })
>
> -
> /*
> * Command queue insertion.
> * This is made fiddly by our attempts to achieve some sort of scalability
> @@ -1446,23 +1445,13 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
> static struct arm_smmu_master *
> arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
> {
> - struct rb_node *node;
> struct arm_smmu_stream *stream;
>
> lockdep_assert_held(&smmu->streams_mutex);
>
> - node = smmu->streams.rb_node;
> - while (node) {
> - stream = rb_entry(node, struct arm_smmu_stream, node);
> - if (stream->id < sid)
> - node = node->rb_right;
> - else if (stream->id > sid)
> - node = node->rb_left;
> - else
> - return stream->master;
> - }
> + stream = xa_load(&smmu->streams, sid);
>
> - return NULL;
> + return stream ? stream->master : NULL;
> }
>
> /* IRQ and event handlers */
> @@ -2573,11 +2562,10 @@ static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
> static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
> struct arm_smmu_master *master)
> {
> - int i;
> - int ret = 0;
> - struct arm_smmu_stream *new_stream, *cur_stream;
> - struct rb_node **new_node, *parent_node = NULL;
> struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
> + struct arm_smmu_stream *new_stream;
> + int ret = 0;
> + int i;
>
> master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
> GFP_KERNEL);
> @@ -2597,34 +2585,18 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
> if (ret)
> break;
>
> - /* Insert into SID tree */
> - new_node = &(smmu->streams.rb_node);
> - while (*new_node) {
> - cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
> - node);
> - parent_node = *new_node;
> - if (cur_stream->id > new_stream->id) {
> - new_node = &((*new_node)->rb_left);
> - } else if (cur_stream->id < new_stream->id) {
> - new_node = &((*new_node)->rb_right);
> - } else {
> - dev_warn(master->dev,
> - "stream %u already in tree\n",
> - cur_stream->id);
> - ret = -EINVAL;
> - break;
> - }
> - }
> - if (ret)
> + ret = xa_insert(&smmu->streams, sid, new_stream, GFP_KERNEL);
> + if (ret) {
> + if (ret == -EBUSY)
> + dev_warn(master->dev, "stream %u already binded\n",
Nit: "bound", however I think "in use" would be even better, to avoid
confusion with SVA terminology.
Thanks,
Robin.
[1]
https://lore.kernel.org/linux-iommu/ecb3725c-27c4-944b-b42c-f4e293521f94@arm.com/
> + sid);
> break;
> -
> - rb_link_node(&new_stream->node, parent_node, new_node);
> - rb_insert_color(&new_stream->node, &smmu->streams);
> + }
> }
>
> if (ret) {
> for (i--; i >= 0; i--)
> - rb_erase(&master->streams[i].node, &smmu->streams);
> + xa_erase(&smmu->streams, master->streams[i].id);
> kfree(master->streams);
> }
> mutex_unlock(&smmu->streams_mutex);
> @@ -2643,7 +2615,7 @@ static void arm_smmu_remove_master(struct arm_smmu_master *master)
>
> mutex_lock(&smmu->streams_mutex);
> for (i = 0; i < fwspec->num_ids; i++)
> - rb_erase(&master->streams[i].node, &smmu->streams);
> + xa_erase(&smmu->streams, master->streams[i].id);
> mutex_unlock(&smmu->streams_mutex);
>
> kfree(master->streams);
> @@ -3097,7 +3069,7 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
> int ret;
>
> mutex_init(&smmu->streams_mutex);
> - smmu->streams = RB_ROOT;
> + xa_init(&smmu->streams);
>
> ret = arm_smmu_init_queues(smmu);
> if (ret)
> @@ -3913,6 +3885,7 @@ static void arm_smmu_device_remove(struct platform_device *pdev)
> arm_smmu_device_disable(smmu);
> iopf_queue_free(smmu->evtq.iopf);
> ida_destroy(&smmu->vmid_map);
> + xa_destroy(&smmu->streams);
> }
>
> static void arm_smmu_device_shutdown(struct platform_device *pdev)
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> index 9915850dd4db..f500754d4fbe 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> @@ -680,14 +680,13 @@ struct arm_smmu_device {
> /* IOMMU core code handle */
> struct iommu_device iommu;
>
> - struct rb_root streams;
> + struct xarray streams;
> struct mutex streams_mutex;
> };
>
> struct arm_smmu_stream {
> u32 id;
> struct arm_smmu_master *master;
> - struct rb_node node;
> };
>
> /* SMMU private data for each master */
@@ -489,7 +489,6 @@ static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
local_irq_restore(flags); \
})
-
/*
* Command queue insertion.
* This is made fiddly by our attempts to achieve some sort of scalability
@@ -1446,23 +1445,13 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
static struct arm_smmu_master *
arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
{
- struct rb_node *node;
struct arm_smmu_stream *stream;
lockdep_assert_held(&smmu->streams_mutex);
- node = smmu->streams.rb_node;
- while (node) {
- stream = rb_entry(node, struct arm_smmu_stream, node);
- if (stream->id < sid)
- node = node->rb_right;
- else if (stream->id > sid)
- node = node->rb_left;
- else
- return stream->master;
- }
+ stream = xa_load(&smmu->streams, sid);
- return NULL;
+ return stream ? stream->master : NULL;
}
/* IRQ and event handlers */
@@ -2573,11 +2562,10 @@ static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
struct arm_smmu_master *master)
{
- int i;
- int ret = 0;
- struct arm_smmu_stream *new_stream, *cur_stream;
- struct rb_node **new_node, *parent_node = NULL;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
+ struct arm_smmu_stream *new_stream;
+ int ret = 0;
+ int i;
master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
GFP_KERNEL);
@@ -2597,34 +2585,18 @@ static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
if (ret)
break;
- /* Insert into SID tree */
- new_node = &(smmu->streams.rb_node);
- while (*new_node) {
- cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
- node);
- parent_node = *new_node;
- if (cur_stream->id > new_stream->id) {
- new_node = &((*new_node)->rb_left);
- } else if (cur_stream->id < new_stream->id) {
- new_node = &((*new_node)->rb_right);
- } else {
- dev_warn(master->dev,
- "stream %u already in tree\n",
- cur_stream->id);
- ret = -EINVAL;
- break;
- }
- }
- if (ret)
+ ret = xa_insert(&smmu->streams, sid, new_stream, GFP_KERNEL);
+ if (ret) {
+ if (ret == -EBUSY)
+ dev_warn(master->dev, "stream %u already binded\n",
+ sid);
break;
-
- rb_link_node(&new_stream->node, parent_node, new_node);
- rb_insert_color(&new_stream->node, &smmu->streams);
+ }
}
if (ret) {
for (i--; i >= 0; i--)
- rb_erase(&master->streams[i].node, &smmu->streams);
+ xa_erase(&smmu->streams, master->streams[i].id);
kfree(master->streams);
}
mutex_unlock(&smmu->streams_mutex);
@@ -2643,7 +2615,7 @@ static void arm_smmu_remove_master(struct arm_smmu_master *master)
mutex_lock(&smmu->streams_mutex);
for (i = 0; i < fwspec->num_ids; i++)
- rb_erase(&master->streams[i].node, &smmu->streams);
+ xa_erase(&smmu->streams, master->streams[i].id);
mutex_unlock(&smmu->streams_mutex);
kfree(master->streams);
@@ -3097,7 +3069,7 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
int ret;
mutex_init(&smmu->streams_mutex);
- smmu->streams = RB_ROOT;
+ xa_init(&smmu->streams);
ret = arm_smmu_init_queues(smmu);
if (ret)
@@ -3913,6 +3885,7 @@ static void arm_smmu_device_remove(struct platform_device *pdev)
arm_smmu_device_disable(smmu);
iopf_queue_free(smmu->evtq.iopf);
ida_destroy(&smmu->vmid_map);
+ xa_destroy(&smmu->streams);
}
static void arm_smmu_device_shutdown(struct platform_device *pdev)
@@ -680,14 +680,13 @@ struct arm_smmu_device {
/* IOMMU core code handle */
struct iommu_device iommu;
- struct rb_root streams;
+ struct xarray streams;
struct mutex streams_mutex;
};
struct arm_smmu_stream {
u32 id;
struct arm_smmu_master *master;
- struct rb_node node;
};
/* SMMU private data for each master */