[v3,6/8] iommu/arm-smmu-v3: Move CD table to arm_smmu_master

Message ID 20230802023524.v3.6.Ice063dcf87d1b777a72e008d9e3406d2bcf6d876@changeid
State New
Headers
Series Refactor the SMMU's CD table ownership |

Commit Message

Michael Shavit Aug. 1, 2023, 6:35 p.m. UTC
  With this change, each master will now own its own CD table instead of
sharing one with other masters attached to the same domain. Attaching a
stage 1 domain installs CD entries into the master's CD table. SVA
writes its CD entries into each master's CD table if the domain is
shared across masters.

Signed-off-by: Michael Shavit <mshavit@google.com>
---

(no changes since v2)

Changes in v2:
- Allocate CD table when it's first needed instead of on probe.

Changes in v1:
- The master's CD table allocation was previously split to a different
  commit. This change now atomically allocates the new CD table, uses
  it, and removes the old one.

 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 65 +++++++++------------
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  6 +-
 2 files changed, 31 insertions(+), 40 deletions(-)
  

Comments

Jason Gunthorpe Aug. 1, 2023, 11:53 p.m. UTC | #1
On Wed, Aug 02, 2023 at 02:35:23AM +0800, Michael Shavit wrote:
> @@ -2465,6 +2440,22 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
>  	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
>  		master->ats_enabled = arm_smmu_ats_supported(master);
>  
> +	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
> +		if (!master->cd_table.cdtab) {
> +			ret = arm_smmu_alloc_cd_tables(master);
> +			if (ret) {

Again, I didn't look very closely at your locking, but what lock is
being held to protect the read of master->cd_table.cdtab ?

> +				master->domain = NULL;
> +				goto out_unlock;

This is only the domain lock:
	mutex_unlock(&smmu_domain->init_mutex);

Which is no longer sufficient.

You need some kind of lock in the master struct to protect the shared
cd table and everything in it?

Jason
  
Michael Shavit Aug. 2, 2023, 11:19 a.m. UTC | #2
On Wed, Aug 2, 2023 at 7:53 AM Jason Gunthorpe <jgg@nvidia.com> wrote:
>
> On Wed, Aug 02, 2023 at 02:35:23AM +0800, Michael Shavit wrote:
> > @@ -2465,6 +2440,22 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
> >       if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
> >               master->ats_enabled = arm_smmu_ats_supported(master);
> >
> > +     if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
> > +             if (!master->cd_table.cdtab) {
> > +                     ret = arm_smmu_alloc_cd_tables(master);
> > +                     if (ret) {
>
> Again, I didn't look very closely at your locking, but what lock is
> being held to protect the read of master->cd_table.cdtab ?

The cd_table is only written into (with write_ctx_desc) when something
attaches or detaches (SVA is a little weird, but it handles locking
internally, and blocks all non-sva attach/detach calls while enabled).
The cd_table itself is allocated on first attach, and freed on release.

Doesn't the iommu framework guarantee that attach_dev (and
release_device) won't have concurrent calls for a given master through
the group lock? I can add an internal lock if relying on the iommu
lock is not OK.

> > +                             master->domain = NULL;
> > +                             goto out_unlock;
>
> This is only the domain lock:
>         mutex_unlock(&smmu_domain->init_mutex);
>
> Which is no longer sufficient.

Hmmm yeah that lock looks misleading here. Let me move the unlock
further up so that it more clearly surrounds the section it protects.
  
Jason Gunthorpe Aug. 2, 2023, 11:51 a.m. UTC | #3
On Wed, Aug 02, 2023 at 07:19:12PM +0800, Michael Shavit wrote:
> On Wed, Aug 2, 2023 at 7:53 AM Jason Gunthorpe <jgg@nvidia.com> wrote:
> >
> > On Wed, Aug 02, 2023 at 02:35:23AM +0800, Michael Shavit wrote:
> > > @@ -2465,6 +2440,22 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
> > >       if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
> > >               master->ats_enabled = arm_smmu_ats_supported(master);
> > >
> > > +     if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
> > > +             if (!master->cd_table.cdtab) {
> > > +                     ret = arm_smmu_alloc_cd_tables(master);
> > > +                     if (ret) {
> >
> > Again, I didn't look very closely at your locking, but what lock is
> > being held to protect the read of master->cd_table.cdtab ?
> 
> The cd_table is only written into (with write_ctx_desc) when something
> attaches or detaches (SVA is a little weird, but it handles locking
> internally, and blocks all non-sva attach/detach calls while enabled).
> The cd_table itself is allocated on first attach, and freed on release.
> 
> Doesn't the iommu framework guarantee that attach_dev (and
> release_device) won't have concurrent calls for a given master through
> the group lock? I can add an internal lock if relying on the iommu
> lock is not OK.

Yes that is right.

So, a comment about that in the struct around those variables would be
helpful (locked by the iommu core using the group mutex)

But the code is fine

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>

Jason
  

Patch

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 34bd7815aeb8e..b05963cd4e5b5 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1025,7 +1025,7 @@  static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid)
 	unsigned int idx;
 	struct arm_smmu_l1_ctx_desc *l1_desc;
 	struct arm_smmu_device *smmu = master->smmu;
-	struct arm_smmu_ctx_desc_cfg *cdcfg = &master->domain->cd_table;
+	struct arm_smmu_ctx_desc_cfg *cdcfg = &master->cd_table;
 
 	if (!cdcfg->l1_desc)
 		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
@@ -1062,7 +1062,7 @@  int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
 	u64 val;
 	bool cd_live;
 	__le64 *cdptr;
-	struct arm_smmu_ctx_desc_cfg *cd_table = &master->domain->cd_table;
+	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
 
 	if (WARN_ON(ssid >= (1 << cd_table->max_cds_bits)))
 		return -E2BIG;
@@ -1125,14 +1125,13 @@  int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
 	return 0;
 }
 
-static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain,
-				    struct arm_smmu_master *master)
+static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
 {
 	int ret;
 	size_t l1size;
 	size_t max_contexts;
 	struct arm_smmu_device *smmu = master->smmu;
-	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->cd_table;
+	struct arm_smmu_ctx_desc_cfg *cdcfg = &master->cd_table;
 
 	cdcfg->stall_enabled = master->stall_enabled;
 	cdcfg->max_cds_bits = master->ssid_bits;
@@ -1174,12 +1173,12 @@  static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain,
 	return ret;
 }
 
-static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
+static void arm_smmu_free_cd_tables(struct arm_smmu_master *master)
 {
 	int i;
 	size_t size, l1size;
-	struct arm_smmu_device *smmu = smmu_domain->smmu;
-	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->cd_table;
+	struct arm_smmu_device *smmu = master->smmu;
+	struct arm_smmu_ctx_desc_cfg *cdcfg = &master->cd_table;
 
 	if (cdcfg->l1_desc) {
 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
@@ -1287,7 +1286,7 @@  static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
 	if (smmu_domain) {
 		switch (smmu_domain->stage) {
 		case ARM_SMMU_DOMAIN_S1:
-			cd_table = &smmu_domain->cd_table;
+			cd_table = &master->cd_table;
 			break;
 		case ARM_SMMU_DOMAIN_S2:
 		case ARM_SMMU_DOMAIN_NESTED:
@@ -2077,14 +2076,10 @@  static void arm_smmu_domain_free(struct iommu_domain *domain)
 
 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
 
-	/* Free the CD and ASID, if we allocated them */
+	/* Free the ASID or VMID */
 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
-		struct arm_smmu_ctx_desc_cfg *cd_table = &smmu_domain->cd_table;
-
 		/* Prevent SVA from touching the CD while we're freeing it */
 		mutex_lock(&arm_smmu_asid_lock);
-		if (cd_table->cdtab)
-			arm_smmu_free_cd_tables(smmu_domain);
 		arm_smmu_free_asid(&smmu_domain->cd);
 		mutex_unlock(&arm_smmu_asid_lock);
 	} else {
@@ -2096,7 +2091,7 @@  static void arm_smmu_domain_free(struct iommu_domain *domain)
 	kfree(smmu_domain);
 }
 
-static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
+static int arm_smmu_domain_finalise_cd(struct arm_smmu_domain *smmu_domain,
 				       struct arm_smmu_master *master,
 				       struct io_pgtable_cfg *pgtbl_cfg)
 {
@@ -2115,10 +2110,6 @@  static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
 	if (ret)
 		goto out_unlock;
 
-	ret = arm_smmu_alloc_cd_tables(smmu_domain, master);
-	if (ret)
-		goto out_free_asid;
-
 	cd->asid	= (u16)asid;
 	cd->ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
 	cd->tcr		= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
@@ -2130,17 +2121,9 @@  static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
 	cd->mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
 
-	ret = arm_smmu_write_ctx_desc(master, 0, cd);
-	if (ret)
-		goto out_free_cd_tables;
-
 	mutex_unlock(&arm_smmu_asid_lock);
 	return 0;
 
-out_free_cd_tables:
-	arm_smmu_free_cd_tables(smmu_domain);
-out_free_asid:
-	arm_smmu_free_asid(cd);
 out_unlock:
 	mutex_unlock(&arm_smmu_asid_lock);
 	return ret;
@@ -2203,7 +2186,7 @@  static int arm_smmu_domain_finalise(struct iommu_domain *domain,
 		ias = min_t(unsigned long, ias, VA_BITS);
 		oas = smmu->ias;
 		fmt = ARM_64_LPAE_S1;
-		finalise_stage_fn = arm_smmu_domain_finalise_s1;
+		finalise_stage_fn = arm_smmu_domain_finalise_cd;
 		break;
 	case ARM_SMMU_DOMAIN_NESTED:
 	case ARM_SMMU_DOMAIN_S2:
@@ -2443,14 +2426,6 @@  static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 	} else if (smmu_domain->smmu != smmu) {
 		ret = -EINVAL;
 		goto out_unlock;
-	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
-		   master->ssid_bits != smmu_domain->cd_table.max_cds_bits) {
-		ret = -EINVAL;
-		goto out_unlock;
-	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
-		   smmu_domain->cd_table.stall_enabled != master->stall_enabled) {
-		ret = -EINVAL;
-		goto out_unlock;
 	}
 
 	master->domain = smmu_domain;
@@ -2465,6 +2440,22 @@  static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
 		master->ats_enabled = arm_smmu_ats_supported(master);
 
+	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
+		if (!master->cd_table.cdtab) {
+			ret = arm_smmu_alloc_cd_tables(master);
+			if (ret) {
+				master->domain = NULL;
+				goto out_unlock;
+			}
+		}
+
+		ret = arm_smmu_write_ctx_desc(master, 0, &smmu_domain->cd);
+		if (ret) {
+			master->domain = NULL;
+			goto out_unlock;
+		}
+	}
+
 	arm_smmu_install_ste_for_dev(master);
 
 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
@@ -2719,6 +2710,8 @@  static void arm_smmu_release_device(struct device *dev)
 	arm_smmu_detach_dev(master);
 	arm_smmu_disable_pasid(master);
 	arm_smmu_remove_master(master);
+	if (master->cd_table.cdtab_dma)
+		arm_smmu_free_cd_tables(master);
 	kfree(master);
 }
 
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 6066a09c01996..f2acfcc1af925 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -694,6 +694,7 @@  struct arm_smmu_master {
 	struct arm_smmu_domain		*domain;
 	struct list_head		domain_head;
 	struct arm_smmu_stream		*streams;
+	struct arm_smmu_ctx_desc_cfg	cd_table;
 	unsigned int			num_streams;
 	bool				ats_enabled;
 	bool				stall_enabled;
@@ -720,11 +721,8 @@  struct arm_smmu_domain {
 
 	enum arm_smmu_domain_stage	stage;
 	union {
-		struct {
 		struct arm_smmu_ctx_desc	cd;
-		struct arm_smmu_ctx_desc_cfg	cd_table;
-		};
-		struct arm_smmu_s2_cfg	s2_cfg;
+		struct arm_smmu_s2_cfg		s2_cfg;
 	};
 
 	struct iommu_domain		domain;