[v4,6/8] iommu/arm-smmu-v3: Move CD table to arm_smmu_master

Message ID 20230803003234.v4.6.Ice063dcf87d1b777a72e008d9e3406d2bcf6d876@changeid
State New
Headers
Series Refactor the SMMU's CD table ownership |

Commit Message

Michael Shavit Aug. 2, 2023, 4:32 p.m. UTC
  With this change, each master will now own its own CD table instead of
sharing one with other masters attached to the same domain. Attaching a
stage 1 domain installs CD entries into the master's CD table. SVA
writes its CD entries into each master's CD table if the domain is
shared across masters.

Signed-off-by: Michael Shavit <mshavit@google.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
---

Changes in v4:
- Added comment about the cd_table's dependency on the iommu core's
  group mutex.
- Narrowed the range of code for which the domain's init_mutex is held
  on attach since it now only protects the arm_smmu_domain_finalise
  call.

Changes in v2:
- Allocate CD table when it's first needed instead of on probe.

Changes in v1:
- The master's CD table allocation was previously split to a different
  commit. This change now atomically allocates the new CD table, uses
  it, and removes the old one.

 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 82 +++++++++------------
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  7 +-
 2 files changed, 39 insertions(+), 50 deletions(-)
  

Comments

Michael Shavit Aug. 3, 2023, 5:56 p.m. UTC | #1
This patch introduces a subtle bug.

Previously, the arm-smmu-v3 driver could get away with skipping the
clearing of the CD entry on detach, since the table belonged to the
domain and wouldn't be re-written on re-attach. When we switch to the
master-owned table model, that CDTE in the master's table can get
written to with different CD domains. When the CD domain get's
switched to a new one without first being cleared, arm_smmu_write_ctx
will mis-interpret its call as an ASID update instead of an entirely
new Cd.

This bug was handled by clearing the CD entry on detach in the
"iommu/arm-smmu-v3: Refactor write_ctx_desc" commit before it was
split from the set_dev_pasid
series(https://lore.kernel.org/all/20230621063825.268890-5-mshavit@google.com/).
The change was lost when the commit moved to this series however. It's
splitting hairs a little, but that fix probably belongs in this patch
instead.
  
Jason Gunthorpe Aug. 3, 2023, 6:47 p.m. UTC | #2
On Fri, Aug 04, 2023 at 01:56:12AM +0800, Michael Shavit wrote:
> This patch introduces a subtle bug.
> 
> Previously, the arm-smmu-v3 driver could get away with skipping the
> clearing of the CD entry on detach, since the table belonged to the
> domain and wouldn't be re-written on re-attach. When we switch to the
> master-owned table model, that CDTE in the master's table can get
> written to with different CD domains. When the CD domain get's
> switched to a new one without first being cleared, arm_smmu_write_ctx
> will mis-interpret its call as an ASID update instead of an entirely
> new Cd.

I'm not surprised, I think arm_smmu_write_ctx is a little too clever
for its own good..

I would have written it by computing the full target CD entry,
extracted directly from the domain.

Something like:

struct cd_entry {
	__le64 val[4];
};

static void arm_smmu_get_domain_cd_value(struct arm_smmu_domain *domain,
					 struct arm_smmu_master *master,
					 bool quiet, struct cd_entry *entry)
{
	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
	struct arm_smmu_ctx_desc *cd = &domain->cd;
	u64 val0;

	if (!domain) {
		memset(entry, 0, sizeof(*entry));
		return;
	}

	val0 = cd->tcr |
#ifdef __BIG_ENDIAN
	       CTXDESC_CD_0_ENDI |
#endif
	       CTXDESC_CD_0_R | CTXDESC_CD_0_A |
	       (cd->mm ? 0 : CTXDESC_CD_0_ASET) | CTXDESC_CD_0_AA64 |
	       FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) | CTXDESC_CD_0_V;

	if (cd_table->stall_enabled)
		val0 |= CTXDESC_CD_0_S;

	if (quiet)
		val0 |= CTXDESC_CD_0_TCR_EPD0;

	entry->val[0] = cpu_to_le64(val0);
	entry->val[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
	entry->val[2] = 0;
	entry->val[3] = cpu_to_le64(cd->mair);
}

int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
			    struct arm_smmu_ctx_desc *cd)
{
	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
	struct cd_entry *cur_cd;
	struct cd_entry new_cd;

	if (WARN_ON(ssid >= (1 << cd_table->max_cds_bits)))
		return -E2BIG;

	new_cd = arm_smmu_get_cd_ptr(master, ssid);
	if (!new_cd)
		return -ENOMEM;

	arm_smmu_get_domain_cd_value(domain, master, cd == &quiet_cd, &new_cd);

	/*
	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
	 * "Configuration structures and configuration invalidation completion"
	 *
	 *   The size of single-copy atomic reads made by the SMMU is
	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
	 *   field within an aligned 64-bit span of a structure can be altered
	 *   without first making the structure invalid.
	 */

	/*
	 * Changing only dword 0 is common enough that we give it a fast path.
	 */
	if (cur_cd->val[1] != new_cd.val[1] ||
	    cur_cd->val[2] != new_cd.val[2] ||
	    cur_cd->val[3] != new_cd.val[3]) {
		/* Make it invalid so we can update all 4 values */
		if (le64_to_cpu(cur_cd->val[0]) & CTXDESC_CD_0_V) {
			if (le64_to_cpu(new_cd.val[0]) & CTXDESC_CD_0_V)
				WRITE_ONCE(cur_cd->val[0], 0);
			else
				WRITE_ONCE(cur_cd->val[0], new_cd.val[0]);
			arm_smmu_sync_cd(master, ssid, true);
		}

		cur_cd->val[1] = new_cd.val[1];
		cur_cd->val[2] = new_cd.val[2];
		cur_cd->val[3] = new_cd.val[3];

		/*
		 * CD entry may be live, and the SMMU might read dwords of this
		 * CD in any order. Ensure that it observes valid values before
		 * reading V=1.
		 */
		if (le64_to_cpu(new_cd.val[0]) & CTXDESC_CD_0_V)
			arm_smmu_sync_cd(master, ssid, true);
	}
	if (cur_cd->val[0] == new_cd.val[0])
		return 0;

	WRITE_ONCE(cur_cd->val[0], new_cd.val[0]);
	arm_smmu_sync_cd(master, ssid, true);
}

Jason
  
Nicolin Chen Aug. 4, 2023, 10:25 p.m. UTC | #3
On Thu, Aug 03, 2023 at 12:32:34AM +0800, Michael Shavit wrote:
> +static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
>  {
>         int ret;
>         size_t l1size;
>         size_t max_contexts;
>         struct arm_smmu_device *smmu = master->smmu;
> -       struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->cd_table;
> +       struct arm_smmu_ctx_desc_cfg *cdcfg = &master->cd_table;
> 
>         cdcfg->stall_enabled = master->stall_enabled;

We have stall_enabled at both master->cd_table->stall_enabled
and master->stall_enabled, and we removed stall_enabled from
the CD structure...

> @@ -2436,22 +2419,14 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
>         if (!smmu_domain->smmu) {
>                 smmu_domain->smmu = smmu;
>                 ret = arm_smmu_domain_finalise(domain, master);
> -               if (ret) {
> +               if (ret)
>                         smmu_domain->smmu = NULL;
> -                       goto out_unlock;
> -               }
> -       } else if (smmu_domain->smmu != smmu) {
> +       } else if (smmu_domain->smmu != smmu)
>                 ret = -EINVAL;
> -               goto out_unlock;
> -       } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
> -                  master->ssid_bits != smmu_domain->cd_table.max_cds_bits) {
> -               ret = -EINVAL;
> -               goto out_unlock;
> -       } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
> -                  smmu_domain->cd_table.stall_enabled != master->stall_enabled) {
> -               ret = -EINVAL;
> -               goto out_unlock;
> -       }

... then we remove this stall_enabled sanity also.

This means a shared domain (holding a shared CD) being inserted
to two CD tables from two masters would have two different CDTE
configurations at the stall bit.

If this is fine (I can't think of something wrong but not sure),
it would be okay here, though I feel we could mention this some-
where (maybe commit logs) since it changes the attach behavior?

Thanks
Nicolin
  
Jason Gunthorpe Aug. 4, 2023, 10:46 p.m. UTC | #4
On Fri, Aug 04, 2023 at 03:25:43PM -0700, Nicolin Chen wrote:
> > @@ -2436,22 +2419,14 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
> >         if (!smmu_domain->smmu) {
> >                 smmu_domain->smmu = smmu;
> >                 ret = arm_smmu_domain_finalise(domain, master);
> > -               if (ret) {
> > +               if (ret)
> >                         smmu_domain->smmu = NULL;
> > -                       goto out_unlock;
> > -               }
> > -       } else if (smmu_domain->smmu != smmu) {
> > +       } else if (smmu_domain->smmu != smmu)
> >                 ret = -EINVAL;
> > -               goto out_unlock;
> > -       } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
> > -                  master->ssid_bits != smmu_domain->cd_table.max_cds_bits) {
> > -               ret = -EINVAL;
> > -               goto out_unlock;
> > -       } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
> > -                  smmu_domain->cd_table.stall_enabled != master->stall_enabled) {
> > -               ret = -EINVAL;
> > -               goto out_unlock;
> > -       }
> 
> ... then we remove this stall_enabled sanity also.
> 
> This means a shared domain (holding a shared CD) being inserted
> to two CD tables from two masters would have two different CDTE
> configurations at the stall bit.

I looked through the spec for a while and I thought this was fine..

Stall is basically a master specific behavior on how to operate page
faulting. It makes sense that it follows the master and the IOPTEs in
the domain can be used with both the faulting and non-faulting page
faulting path.

I would expect the page faulting path to figure out what to (if there
is anything special to do) do based on the master that triggered the
fault, not based on the domain that received it.

Jason
  
Nicolin Chen Aug. 4, 2023, 11:11 p.m. UTC | #5
On Fri, Aug 04, 2023 at 07:46:23PM -0300, Jason Gunthorpe wrote:
> On Fri, Aug 04, 2023 at 03:25:43PM -0700, Nicolin Chen wrote:
> > > @@ -2436,22 +2419,14 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
> > >         if (!smmu_domain->smmu) {
> > >                 smmu_domain->smmu = smmu;
> > >                 ret = arm_smmu_domain_finalise(domain, master);
> > > -               if (ret) {
> > > +               if (ret)
> > >                         smmu_domain->smmu = NULL;
> > > -                       goto out_unlock;
> > > -               }
> > > -       } else if (smmu_domain->smmu != smmu) {
> > > +       } else if (smmu_domain->smmu != smmu)
> > >                 ret = -EINVAL;
> > > -               goto out_unlock;
> > > -       } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
> > > -                  master->ssid_bits != smmu_domain->cd_table.max_cds_bits) {
> > > -               ret = -EINVAL;
> > > -               goto out_unlock;
> > > -       } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
> > > -                  smmu_domain->cd_table.stall_enabled != master->stall_enabled) {
> > > -               ret = -EINVAL;
> > > -               goto out_unlock;
> > > -       }
> > 
> > ... then we remove this stall_enabled sanity also.
> > 
> > This means a shared domain (holding a shared CD) being inserted
> > to two CD tables from two masters would have two different CDTE
> > configurations at the stall bit.
> 
> I looked through the spec for a while and I thought this was fine..
> 
> Stall is basically a master specific behavior on how to operate page
> faulting. It makes sense that it follows the master and the IOPTEs in
> the domain can be used with both the faulting and non-faulting page
> faulting path.
>
> I would expect the page faulting path to figure out what to (if there
> is anything special to do) do based on the master that triggered the
> fault, not based on the domain that received it.

Yea, I went through the spec too yet didn't find anything that
could block us. And there is no SW dependency on the STALL bit
of the CDTE: actually it has an inverse relationship with the
S1STALLD bit in the STE, so following the STE/cd_table/master
makes sense. So long as a master has its own cd_table holding
its own CDTE for a shared domain, HW CD caching should be fine
as well.

With that being said, I think mentioning this behavior change
in the commit log wouldn't hurt. Someday people might want to
check this out in case something breaks.

Thanks
Nic
  
Michael Shavit Aug. 7, 2023, 12:19 p.m. UTC | #6
On Fri, Aug 4, 2023 at 2:47 AM Jason Gunthorpe <jgg@nvidia.com> wrote:
>
>
> I'm not surprised, I think arm_smmu_write_ctx is a little too clever
> for its own good..
>
> I would have written it by computing the full target CD entry,
> extracted directly from the domain.
>

Yeah I was considering making a fix to arm_smmu_write_ctx instead; but
clearing the CD entry on detach feels like the right thing to do.
Relying on the 0th CD entry being re-written when the CD table is
re-inserted feels fragile.

Perhaps re-writing arm_smmu_write_ctx could be considered as a
separate singleton patch?
  
Jason Gunthorpe Aug. 7, 2023, 10:41 p.m. UTC | #7
On Mon, Aug 07, 2023 at 08:19:44PM +0800, Michael Shavit wrote:
> On Fri, Aug 4, 2023 at 2:47 AM Jason Gunthorpe <jgg@nvidia.com> wrote:
> >
> >
> > I'm not surprised, I think arm_smmu_write_ctx is a little too clever
> > for its own good..
> >
> > I would have written it by computing the full target CD entry,
> > extracted directly from the domain.
> >
> 
> Yeah I was considering making a fix to arm_smmu_write_ctx instead; but
> clearing the CD entry on detach feels like the right thing to do.
> Relying on the 0th CD entry being re-written when the CD table is
> re-inserted feels fragile.
> 
> Perhaps re-writing arm_smmu_write_ctx could be considered as a
> separate singleton patch?

I wouldn't touch it in this series at least

Jason
  

Patch

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 34bd7815aeb8..e2c33024ad85 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1025,7 +1025,7 @@  static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid)
 	unsigned int idx;
 	struct arm_smmu_l1_ctx_desc *l1_desc;
 	struct arm_smmu_device *smmu = master->smmu;
-	struct arm_smmu_ctx_desc_cfg *cdcfg = &master->domain->cd_table;
+	struct arm_smmu_ctx_desc_cfg *cdcfg = &master->cd_table;
 
 	if (!cdcfg->l1_desc)
 		return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
@@ -1062,7 +1062,7 @@  int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
 	u64 val;
 	bool cd_live;
 	__le64 *cdptr;
-	struct arm_smmu_ctx_desc_cfg *cd_table = &master->domain->cd_table;
+	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
 
 	if (WARN_ON(ssid >= (1 << cd_table->max_cds_bits)))
 		return -E2BIG;
@@ -1125,14 +1125,13 @@  int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
 	return 0;
 }
 
-static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain,
-				    struct arm_smmu_master *master)
+static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
 {
 	int ret;
 	size_t l1size;
 	size_t max_contexts;
 	struct arm_smmu_device *smmu = master->smmu;
-	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->cd_table;
+	struct arm_smmu_ctx_desc_cfg *cdcfg = &master->cd_table;
 
 	cdcfg->stall_enabled = master->stall_enabled;
 	cdcfg->max_cds_bits = master->ssid_bits;
@@ -1174,12 +1173,12 @@  static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain,
 	return ret;
 }
 
-static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
+static void arm_smmu_free_cd_tables(struct arm_smmu_master *master)
 {
 	int i;
 	size_t size, l1size;
-	struct arm_smmu_device *smmu = smmu_domain->smmu;
-	struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->cd_table;
+	struct arm_smmu_device *smmu = master->smmu;
+	struct arm_smmu_ctx_desc_cfg *cdcfg = &master->cd_table;
 
 	if (cdcfg->l1_desc) {
 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
@@ -1287,7 +1286,7 @@  static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
 	if (smmu_domain) {
 		switch (smmu_domain->stage) {
 		case ARM_SMMU_DOMAIN_S1:
-			cd_table = &smmu_domain->cd_table;
+			cd_table = &master->cd_table;
 			break;
 		case ARM_SMMU_DOMAIN_S2:
 		case ARM_SMMU_DOMAIN_NESTED:
@@ -2077,14 +2076,10 @@  static void arm_smmu_domain_free(struct iommu_domain *domain)
 
 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
 
-	/* Free the CD and ASID, if we allocated them */
+	/* Free the ASID or VMID */
 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
-		struct arm_smmu_ctx_desc_cfg *cd_table = &smmu_domain->cd_table;
-
 		/* Prevent SVA from touching the CD while we're freeing it */
 		mutex_lock(&arm_smmu_asid_lock);
-		if (cd_table->cdtab)
-			arm_smmu_free_cd_tables(smmu_domain);
 		arm_smmu_free_asid(&smmu_domain->cd);
 		mutex_unlock(&arm_smmu_asid_lock);
 	} else {
@@ -2096,7 +2091,7 @@  static void arm_smmu_domain_free(struct iommu_domain *domain)
 	kfree(smmu_domain);
 }
 
-static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
+static int arm_smmu_domain_finalise_cd(struct arm_smmu_domain *smmu_domain,
 				       struct arm_smmu_master *master,
 				       struct io_pgtable_cfg *pgtbl_cfg)
 {
@@ -2115,10 +2110,6 @@  static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
 	if (ret)
 		goto out_unlock;
 
-	ret = arm_smmu_alloc_cd_tables(smmu_domain, master);
-	if (ret)
-		goto out_free_asid;
-
 	cd->asid	= (u16)asid;
 	cd->ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
 	cd->tcr		= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
@@ -2130,17 +2121,9 @@  static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
 	cd->mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
 
-	ret = arm_smmu_write_ctx_desc(master, 0, cd);
-	if (ret)
-		goto out_free_cd_tables;
-
 	mutex_unlock(&arm_smmu_asid_lock);
 	return 0;
 
-out_free_cd_tables:
-	arm_smmu_free_cd_tables(smmu_domain);
-out_free_asid:
-	arm_smmu_free_asid(cd);
 out_unlock:
 	mutex_unlock(&arm_smmu_asid_lock);
 	return ret;
@@ -2203,7 +2186,7 @@  static int arm_smmu_domain_finalise(struct iommu_domain *domain,
 		ias = min_t(unsigned long, ias, VA_BITS);
 		oas = smmu->ias;
 		fmt = ARM_64_LPAE_S1;
-		finalise_stage_fn = arm_smmu_domain_finalise_s1;
+		finalise_stage_fn = arm_smmu_domain_finalise_cd;
 		break;
 	case ARM_SMMU_DOMAIN_NESTED:
 	case ARM_SMMU_DOMAIN_S2:
@@ -2436,22 +2419,14 @@  static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 	if (!smmu_domain->smmu) {
 		smmu_domain->smmu = smmu;
 		ret = arm_smmu_domain_finalise(domain, master);
-		if (ret) {
+		if (ret)
 			smmu_domain->smmu = NULL;
-			goto out_unlock;
-		}
-	} else if (smmu_domain->smmu != smmu) {
+	} else if (smmu_domain->smmu != smmu)
 		ret = -EINVAL;
-		goto out_unlock;
-	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
-		   master->ssid_bits != smmu_domain->cd_table.max_cds_bits) {
-		ret = -EINVAL;
-		goto out_unlock;
-	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
-		   smmu_domain->cd_table.stall_enabled != master->stall_enabled) {
-		ret = -EINVAL;
-		goto out_unlock;
-	}
+
+	mutex_unlock(&smmu_domain->init_mutex);
+	if (ret)
+		return ret;
 
 	master->domain = smmu_domain;
 
@@ -2465,6 +2440,22 @@  static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
 		master->ats_enabled = arm_smmu_ats_supported(master);
 
+	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
+		if (!master->cd_table.cdtab) {
+			ret = arm_smmu_alloc_cd_tables(master);
+			if (ret) {
+				master->domain = NULL;
+				return ret;
+			}
+		}
+
+		ret = arm_smmu_write_ctx_desc(master, 0, &smmu_domain->cd);
+		if (ret) {
+			master->domain = NULL;
+			return ret;
+		}
+	}
+
 	arm_smmu_install_ste_for_dev(master);
 
 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
@@ -2472,10 +2463,7 @@  static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
 
 	arm_smmu_enable_ats(master);
-
-out_unlock:
-	mutex_unlock(&smmu_domain->init_mutex);
-	return ret;
+	return 0;
 }
 
 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
@@ -2719,6 +2707,8 @@  static void arm_smmu_release_device(struct device *dev)
 	arm_smmu_detach_dev(master);
 	arm_smmu_disable_pasid(master);
 	arm_smmu_remove_master(master);
+	if (master->cd_table.cdtab_dma)
+		arm_smmu_free_cd_tables(master);
 	kfree(master);
 }
 
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 6066a09c0199..1f3b37025777 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -694,6 +694,8 @@  struct arm_smmu_master {
 	struct arm_smmu_domain		*domain;
 	struct list_head		domain_head;
 	struct arm_smmu_stream		*streams;
+	/* Locked by the iommu core using the group mutex */
+	struct arm_smmu_ctx_desc_cfg	cd_table;
 	unsigned int			num_streams;
 	bool				ats_enabled;
 	bool				stall_enabled;
@@ -720,11 +722,8 @@  struct arm_smmu_domain {
 
 	enum arm_smmu_domain_stage	stage;
 	union {
-		struct {
 		struct arm_smmu_ctx_desc	cd;
-		struct arm_smmu_ctx_desc_cfg	cd_table;
-		};
-		struct arm_smmu_s2_cfg	s2_cfg;
+		struct arm_smmu_s2_cfg		s2_cfg;
 	};
 
 	struct iommu_domain		domain;