[v4,2/2] iommu/arm-smmu-v3: Refactor arm_smmu_write_strtab_ent()

Message ID 6e1fdea8ab43ea28e7e3c79eb6605dea71548c53.1695242337.git.nicolinc@nvidia.com
State New
Headers
Series iommu/arm-smmu-v3: Allow default substream bypass with a pasid support |

Commit Message

Nicolin Chen Sept. 20, 2023, 8:52 p.m. UTC
  A stream table entry generally can be configured for the following cases:
Case #1: STE Stage-1 Translate Only
  The master has a CD table and attached to an S1 or BYPASS domain.
[Config #1] Set STE.Config to S1_TRANS. And set STE.SHCFG to INCOMING,
	    required by a BYPASS domain and ignored by an S1 domain.
	    Then follow the CD table to set the other fields.

Case #2: STE Stage-2 Translate Only
  The master doesn't have a CD table and attached to an S2 domain.
[Config #2] Set STE.Config to S2_TRANS. Then follow the s2_cfg to set the
            other fields.

Case #3: STE Stage-1 and Stage-2 Translate
  The master allocated a CD table and attached to a NESTED domain that has
  an s2_cfg somewhere for stage-2 fields.
[Config #4] Set STE.Config to S1_TRANS | S2_TRANS. Then follow both the CD
            table and the s2_cfg to set the other fields.

Case #4: STE Bypass
  The master doesn't have a CD table and attached to an INDENTITY domain.
[Config #3] Set STE.Config to BYPASS and set STE.SHCFG to INCOMING.

Case #5: STE Abort
  The master is not attached to any domain, and the "disable_bypass" param
  is set to "true".
[Config #4] Set STE.Config to ABORT

After the recent refactor of moving cd/cd_table ownerships, things in the
arm_smmu_write_strtab_ent() are a bit out of date, e.g. master pointer now
is always available. And it doesn't support a special case of attaching a
BYPASS domain to a multi-ssid master in the case #1.

Add helpers by naming them clearly for the first four STE.Config settings.

The case #5 can be covered by calling Config #2 at the end of Config #1,
though the driver currently doesn't really use it and should be updated to
the ongoing nesting design in the IOMMUFD. Yet, the helpers would be able
to simply support that with very limited changes in the furture.

Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 197 +++++++++++---------
 1 file changed, 110 insertions(+), 87 deletions(-)
  

Comments

Jason Gunthorpe Sept. 25, 2023, 6:35 p.m. UTC | #1
On Wed, Sep 20, 2023 at 01:52:04PM -0700, Nicolin Chen wrote:
>  
> +static void arm_smmu_ste_stage2_translate(struct arm_smmu_master *master,
> +					  u64 *ste)
> +{
> +	struct arm_smmu_domain *smmu_domain = master->domain;
> +	struct arm_smmu_device *smmu = master->smmu;
> +	struct arm_smmu_s2_cfg *s2_cfg;
> +
> +	switch (smmu_domain->stage) {
> +	case ARM_SMMU_DOMAIN_NESTED:
> +	case ARM_SMMU_DOMAIN_S2:
> +		s2_cfg = &smmu_domain->s2_cfg;
> +		break;
> +	default:
> +		WARN_ON(1);
> +		return;
> +	}
> +
> +	ste[0] |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
> +
> +	if (smmu->features & ARM_SMMU_FEAT_STALLS && !master->stall_enabled)
> +		ste[1] |= STRTAB_STE_1_S1STALLD;
> +
> +	if (master->ats_enabled)
> +		ste[1] |= FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS);

These master bits probably belong in their own function 'setup ste for master'

The s1 and s2 cases are duplicating these things.

> +
> +	ste[2] |= FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
> +		  FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
> +#ifdef __BIG_ENDIAN
> +		  STRTAB_STE_2_S2ENDI |
> +#endif
> +		  STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2R;
> +
> +	ste[3] |= s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK;
> +}
> +
> +static void arm_smmu_ste_stage1_translate(struct arm_smmu_master *master,
> +					  u64 *ste)
> +{

Lets stop calling the cdtable 'stage 1' please, it is confusing.

arm_smmu_ste_cdtable()

> +	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
> +	struct arm_smmu_device *smmu = master->smmu;
> +	__le64 *cdptr = arm_smmu_get_cd_ptr(master, 0);
> +
> +	WARN_ON_ONCE(!cdptr);
> +
> +	ste[0] |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
> +		  FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
> +		  FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax) |
> +		  FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt);
> +
> +	if (FIELD_GET(CTXDESC_CD_0_ASID, le64_to_cpu(cdptr[0])))

Reading the CD like that seems like a hacky way to detect that the RID
domain is bypass, just do it directly:

if (master->domain->stage == ARM_SMMU_DOMAIN_BYPASS)
		ste[1] |= FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_BYPASS);
else
		ste[1] |= FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0);

> +	ste[1] |= FIELD_PREP(STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING) |
> +		  FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
> +		  FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
> +		  FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH);
> +
> +	if (smmu->features & ARM_SMMU_FEAT_E2H)
> +		ste[1] |= FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_EL2);
> +	else
> +		ste[1] |= FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1);
> +
> +	if (smmu->features & ARM_SMMU_FEAT_STALLS && !master->stall_enabled)
> +		ste[1] |= STRTAB_STE_1_S1STALLD;
> +
> +	if (master->ats_enabled)
> +		ste[1] |= FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS);
> +
> +	if (master->domain->stage == ARM_SMMU_DOMAIN_NESTED)
> +		arm_smmu_ste_stage2_translate(master, ste);

I think this needs a comment

/*
 * SMMUv3 does not support using a S2 domain and a CD table for anything 
 * other than nesting where the S2 is the translation for the CD
 * table, and all associated S1s.
 */

> +	if (le64_to_cpu(dst[0]) & STRTAB_STE_0_V) {
> +		switch (FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(dst[0]))) {
>  		case STRTAB_STE_0_CFG_BYPASS:
>  			break;
>  		case STRTAB_STE_0_CFG_S1_TRANS:

This thing could go into a function 'ste_is_live' too

> +	ste[0] = STRTAB_STE_0_V;
>  
> +	if (master->cd_table.cdtab && master->domain) {

I think the weirdness in arm_smmu_detach_dev() causes trouble here?
Despite the name the function is some sort of preperation to
attach_dev.

So if we change attachments while a cdtab is active we should not
remove the cdtab.

Basically, no master->domain check..

IMHO, I still don't like how this is structured. We have
arm_smmu_detach_dev() which really just wants to invalidate the STE.
Now that you shifted some of the logic to functions this might be
better overall:

static void arm_smmu_store_ste(struct arm_smmu_master *master,
				      __le64 *dst, u64 *src)
{
	bool ste_sync_all = false;

	for (i = 1; i < 4; i++) {
		if (dst[i] == cpu_to_le64(ste[i]))
			continue;
		dst[i] = cpu_to_le64(ste[i]);
		ste_sync_all = true;
	}

	if (ste_sync_all)
		arm_smmu_sync_ste_for_sid(smmu, sid);
	/* See comment in arm_smmu_write_ctx_desc() */
	WRITE_ONCE(dst[0], cpu_to_le64(ste[0]));
	arm_smmu_sync_ste_for_sid(smmu, sid);
}

static void arm_smmu_clear_strtab_ent(struct arm_smmu_master *master,
				      __le64 *dst)
{
	u64 ste[4] = {};

	ste[0] = STRTAB_STE_0_V;
	if (disable_bypass)
		arm_smmu_ste_abort(ste);
	else
		arm_smmu_ste_bypass(ste);
	arm_smmu_store_ste(master, dst, &ste);
}

And use clear_strtab_ent from detach ??

(but then I wonder why not set V=0 instead of STE.Config = abort?)

> +		arm_smmu_ste_stage1_translate(master, ste);
> +	} else if (master->domain &&
> +		   master->domain->stage == ARM_SMMU_DOMAIN_S2) {
>  		BUG_ON(ste_live);
> +		arm_smmu_ste_stage2_translate(master, ste);

This whole bit looks nicer as one if

} else if (master->domain) {
       	   if (master->domain->stage == ARM_SMMU_DOMAIN_S2)
		arm_smmu_ste_stage2_translate(master, ste);
	   else if (master->domain->domain.type == IOMMU_DOMAIN_IDENTITY)
		arm_smmu_ste_bypass(ste);
	   else
		BUG_ON()
} else {
    // Ugh, removing this case requires more work
}

(Linus will not like the bug_on's btw, the function really should
fail)

> +	for (i = 1; i < 4; i++) {
> +		if (dst[i] == cpu_to_le64(ste[i]))
> +			continue;
> +		dst[i] = cpu_to_le64(ste[i]);
> +		ste_sync_all = true;
> +	}

This isn't going to work if the transition is from a fully valid STE
to an invalid one, it will corrupt the still in-use bytes.

Though current code does this:

		dst[0] = cpu_to_le64(val);
		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
						STRTAB_STE_1_SHCFG_INCOMING));
		dst[2] = 0; /* Nuke the VMID */

Which I don't really understand either? Why is it OK to wipe the VMID
out of order with the STE.Config change?

Be sure to read the part of the SMMU spec talking about how to update
these things, 3.21.3.1 Configuration structure update procedure and
nearby.

Regardless there are clearly two orders in the existing code

Write 0,1,2,flush (translation -> bypass/fault)

Write 3,2,1,flush,0,flush (bypass/fault -> translation)

You still have to preserve both behaviors.

(interestingly neither seem to follow the guidance of the ARM manual,
so huh)

Still, I think this should be able to become more robust in general..
You have a current and target STE and you just need to figure out what
order to write the bits and if a V=0 transition is needed.

The bigger question is does this have to be more generic to handle
S1DSS which is it's original design goal?

Jason
  
Nicolin Chen Sept. 25, 2023, 8:03 p.m. UTC | #2
On Mon, Sep 25, 2023 at 03:35:23PM -0300, Jason Gunthorpe wrote:
> On Wed, Sep 20, 2023 at 01:52:04PM -0700, Nicolin Chen wrote:
> >  
> > +static void arm_smmu_ste_stage2_translate(struct arm_smmu_master *master,
> > +					  u64 *ste)
> > +{
> > +	struct arm_smmu_domain *smmu_domain = master->domain;
> > +	struct arm_smmu_device *smmu = master->smmu;
> > +	struct arm_smmu_s2_cfg *s2_cfg;
> > +
> > +	switch (smmu_domain->stage) {
> > +	case ARM_SMMU_DOMAIN_NESTED:
> > +	case ARM_SMMU_DOMAIN_S2:
> > +		s2_cfg = &smmu_domain->s2_cfg;
> > +		break;
> > +	default:
> > +		WARN_ON(1);
> > +		return;
> > +	}
> > +
> > +	ste[0] |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
> > +
> > +	if (smmu->features & ARM_SMMU_FEAT_STALLS && !master->stall_enabled)
> > +		ste[1] |= STRTAB_STE_1_S1STALLD;
> > +
> > +	if (master->ats_enabled)
> > +		ste[1] |= FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS);
> 
> These master bits probably belong in their own function 'setup ste for master'
> 
> The s1 and s2 cases are duplicating these things.

OK. I thought that writing these helpers in form of STE.Config
field configurations could be more straightforward despite some
duplications.

> > +
> > +	ste[2] |= FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
> > +		  FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
> > +#ifdef __BIG_ENDIAN
> > +		  STRTAB_STE_2_S2ENDI |
> > +#endif
> > +		  STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2R;
> > +
> > +	ste[3] |= s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK;
> > +}
> > +
> > +static void arm_smmu_ste_stage1_translate(struct arm_smmu_master *master,
> > +					  u64 *ste)
> > +{
> 
> Lets stop calling the cdtable 'stage 1' please, it is confusing.
> 
> arm_smmu_ste_cdtable()

Well, this follows the STE.Config field value namings in the
spec. I can change if you don't like the terms in the spec..

> > +	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
> > +	struct arm_smmu_device *smmu = master->smmu;
> > +	__le64 *cdptr = arm_smmu_get_cd_ptr(master, 0);
> > +
> > +	WARN_ON_ONCE(!cdptr);
> > +
> > +	ste[0] |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
> > +		  FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
> > +		  FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax) |
> > +		  FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt);
> > +
> > +	if (FIELD_GET(CTXDESC_CD_0_ASID, le64_to_cpu(cdptr[0])))
> 
> Reading the CD like that seems like a hacky way to detect that the RID
> domain is bypass, just do it directly:
> 
> if (master->domain->stage == ARM_SMMU_DOMAIN_BYPASS)
> 		ste[1] |= FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_BYPASS);
> else
> 		ste[1] |= FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0);

OK, that'd make this function depend on "domain" also v.s. on
cd_table alone though.

> > +	ste[1] |= FIELD_PREP(STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING) |
> > +		  FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
> > +		  FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
> > +		  FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH);
> > +
> > +	if (smmu->features & ARM_SMMU_FEAT_E2H)
> > +		ste[1] |= FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_EL2);
> > +	else
> > +		ste[1] |= FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1);
> > +
> > +	if (smmu->features & ARM_SMMU_FEAT_STALLS && !master->stall_enabled)
> > +		ste[1] |= STRTAB_STE_1_S1STALLD;
> > +
> > +	if (master->ats_enabled)
> > +		ste[1] |= FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS);
> > +
> > +	if (master->domain->stage == ARM_SMMU_DOMAIN_NESTED)
> > +		arm_smmu_ste_stage2_translate(master, ste);
> 
> I think this needs a comment
> 
> /*
>  * SMMUv3 does not support using a S2 domain and a CD table for anything 
>  * other than nesting where the S2 is the translation for the CD
>  * table, and all associated S1s.
>  */

OK.

> > +	if (le64_to_cpu(dst[0]) & STRTAB_STE_0_V) {
> > +		switch (FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(dst[0]))) {
> >  		case STRTAB_STE_0_CFG_BYPASS:
> >  			break;
> >  		case STRTAB_STE_0_CFG_S1_TRANS:
> 
> This thing could go into a function 'ste_is_live' too

Ack.

> > +	ste[0] = STRTAB_STE_0_V;
> >  
> > +	if (master->cd_table.cdtab && master->domain) {
> 
> I think the weirdness in arm_smmu_detach_dev() causes trouble here?
> Despite the name the function is some sort of preperation to
> attach_dev.

Yea.

> So if we change attachments while a cdtab is active we should not
> remove the cdtab.
> 
> Basically, no master->domain check..

OK. Got your point.

> IMHO, I still don't like how this is structured. We have
> arm_smmu_detach_dev() which really just wants to invalidate the STE.
> Now that you shifted some of the logic to functions this might be
> better overall:
> 
> static void arm_smmu_store_ste(struct arm_smmu_master *master,
> 				      __le64 *dst, u64 *src)
> {
> 	bool ste_sync_all = false;
> 
> 	for (i = 1; i < 4; i++) {
> 		if (dst[i] == cpu_to_le64(ste[i]))
> 			continue;
> 		dst[i] = cpu_to_le64(ste[i]);
> 		ste_sync_all = true;
> 	}
> 
> 	if (ste_sync_all)
> 		arm_smmu_sync_ste_for_sid(smmu, sid);
> 	/* See comment in arm_smmu_write_ctx_desc() */
> 	WRITE_ONCE(dst[0], cpu_to_le64(ste[0]));
> 	arm_smmu_sync_ste_for_sid(smmu, sid);
> }
> 
> static void arm_smmu_clear_strtab_ent(struct arm_smmu_master *master,
> 				      __le64 *dst)
> {
> 	u64 ste[4] = {};
> 
> 	ste[0] = STRTAB_STE_0_V;
> 	if (disable_bypass)
> 		arm_smmu_ste_abort(ste);
> 	else
> 		arm_smmu_ste_bypass(ste);
> 	arm_smmu_store_ste(master, dst, &ste);
> }
> 
> And use clear_strtab_ent from detach ??

We still need bypass() in arm_smmu_write_strtab_ent(). But this
removes the abort() call and its confusing if-condition, I see.

> (but then I wonder why not set V=0 instead of STE.Config = abort?)

It seems that the difference is whether there would be or not a
C_BAD_STE event, i.e. "STE.Config = abort" is a silent way for
a disabled/disconnected device.

> > +		arm_smmu_ste_stage1_translate(master, ste);
> > +	} else if (master->domain &&
> > +		   master->domain->stage == ARM_SMMU_DOMAIN_S2) {
> >  		BUG_ON(ste_live);
> > +		arm_smmu_ste_stage2_translate(master, ste);
> 
> This whole bit looks nicer as one if
> 
> } else if (master->domain) {
>        	   if (master->domain->stage == ARM_SMMU_DOMAIN_S2)
> 		arm_smmu_ste_stage2_translate(master, ste);
> 	   else if (master->domain->domain.type == IOMMU_DOMAIN_IDENTITY)
> 		arm_smmu_ste_bypass(ste);
> 	   else
> 		BUG_ON()
> } else {
>     // Ugh, removing this case requires more work
> }
> 
> (Linus will not like the bug_on's btw, the function really should
> fail)

OK. Let me try that one (and removing BUG_ON).

> > +	for (i = 1; i < 4; i++) {
> > +		if (dst[i] == cpu_to_le64(ste[i]))
> > +			continue;
> > +		dst[i] = cpu_to_le64(ste[i]);
> > +		ste_sync_all = true;
> > +	}
> 
> This isn't going to work if the transition is from a fully valid STE
> to an invalid one, it will corrupt the still in-use bytes.

The driver currently doesn't have a case of unsetting STE_0_V?

> Though current code does this:
> 
> 		dst[0] = cpu_to_le64(val);
> 		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
> 						STRTAB_STE_1_SHCFG_INCOMING));
> 		dst[2] = 0; /* Nuke the VMID */
> 
> Which I don't really understand either? Why is it OK to wipe the VMID
> out of order with the STE.Config change?
> 
> Be sure to read the part of the SMMU spec talking about how to update
> these things, 3.21.3.1 Configuration structure update procedure and
> nearby.
> 
> Regardless there are clearly two orders in the existing code
> 
> Write 0,1,2,flush (translation -> bypass/fault)
> 
> Write 3,2,1,flush,0,flush (bypass/fault -> translation)
> 
> You still have to preserve both behaviors.
> 
> (interestingly neither seem to follow the guidance of the ARM manual,
> so huh)

Again, it is probably because the driver never reverts the V
bit back to 0? While chapter 3.21.3.1 is about V=0 <=> V=1.

> Still, I think this should be able to become more robust in general..
> You have a current and target STE and you just need to figure out what
> order to write the bits and if a V=0 transition is needed.
> 
> The bigger question is does this have to be more generic to handle
> S1DSS which is it's original design goal?

It feels an overkill. Maybe "Refactor arm_smmu_write_strtab_ent()"
is just a too big topic for my goal...

Overall, this version doesn't really dramatically change any STE
configuration flow compared to what the current driver does, but
only adds the S1DSS bypass setting. I'd prefer keeping this in a
smaller scope..

Thanks
Nic
  
Jason Gunthorpe Sept. 26, 2023, 12:12 a.m. UTC | #3
On Mon, Sep 25, 2023 at 01:03:10PM -0700, Nicolin Chen wrote:
> On Mon, Sep 25, 2023 at 03:35:23PM -0300, Jason Gunthorpe wrote:
> > On Wed, Sep 20, 2023 at 01:52:04PM -0700, Nicolin Chen wrote:
> > >  
> > > +static void arm_smmu_ste_stage2_translate(struct arm_smmu_master *master,
> > > +					  u64 *ste)
> > > +{
> > > +	struct arm_smmu_domain *smmu_domain = master->domain;
> > > +	struct arm_smmu_device *smmu = master->smmu;
> > > +	struct arm_smmu_s2_cfg *s2_cfg;
> > > +
> > > +	switch (smmu_domain->stage) {
> > > +	case ARM_SMMU_DOMAIN_NESTED:
> > > +	case ARM_SMMU_DOMAIN_S2:
> > > +		s2_cfg = &smmu_domain->s2_cfg;
> > > +		break;
> > > +	default:
> > > +		WARN_ON(1);
> > > +		return;
> > > +	}
> > > +
> > > +	ste[0] |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
> > > +
> > > +	if (smmu->features & ARM_SMMU_FEAT_STALLS && !master->stall_enabled)
> > > +		ste[1] |= STRTAB_STE_1_S1STALLD;
> > > +
> > > +	if (master->ats_enabled)
> > > +		ste[1] |= FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS);
> > 
> > These master bits probably belong in their own function 'setup ste for master'
> > 
> > The s1 and s2 cases are duplicating these things.
> 
> OK. I thought that writing these helpers in form of STE.Config
> field configurations could be more straightforward despite some
> duplications.

Ah, well, if you take that approach then maybe (and the names too) but
I'm not sure that is the best way..

The approach I had in mind was to go down a path depending on the
configuration of the master. eg if you have a type of domain or a cd
or whatever. That would imply a config, but not necessarily be
organized by config..

> > static void arm_smmu_clear_strtab_ent(struct arm_smmu_master *master,
> > 				      __le64 *dst)
> > {
> > 	u64 ste[4] = {};
> > 
> > 	ste[0] = STRTAB_STE_0_V;
> > 	if (disable_bypass)
> > 		arm_smmu_ste_abort(ste);
> > 	else
> > 		arm_smmu_ste_bypass(ste);
> > 	arm_smmu_store_ste(master, dst, &ste);
> > }
> > 
> > And use clear_strtab_ent from detach ??
> 
> We still need bypass() in arm_smmu_write_strtab_ent(). But this
> removes the abort() call and its confusing if-condition, I see.

Well, it sort of starts to set things up to not be domain sensitive in
this path.. Maybe it is a diversion on the path to just removing that
part of attach.
 
> > (but then I wonder why not set V=0 instead of STE.Config = abort?)
> 
> It seems that the difference is whether there would be or not a
> C_BAD_STE event, i.e. "STE.Config = abort" is a silent way for
> a disabled/disconnected device.

Makes sense

> > > +	for (i = 1; i < 4; i++) {
> > > +		if (dst[i] == cpu_to_le64(ste[i]))
> > > +			continue;
> > > +		dst[i] = cpu_to_le64(ste[i]);
> > > +		ste_sync_all = true;
> > > +	}
> > 
> > This isn't going to work if the transition is from a fully valid STE
> > to an invalid one, it will corrupt the still in-use bytes.
> 
> The driver currently doesn't have a case of unsetting STE_0_V?

Sorry, I didn't mean invalid, I ment different but valid.

 > > Though current code does this:
> > 
> > 		dst[0] = cpu_to_le64(val);
> > 		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
> > 						STRTAB_STE_1_SHCFG_INCOMING));
> > 		dst[2] = 0; /* Nuke the VMID */
> > 
> > Which I don't really understand either? Why is it OK to wipe the VMID
> > out of order with the STE.Config change?
> > 
> > Be sure to read the part of the SMMU spec talking about how to update
> > these things, 3.21.3.1 Configuration structure update procedure and
> > nearby.
> > 
> > Regardless there are clearly two orders in the existing code
> > 
> > Write 0,1,2,flush (translation -> bypass/fault)
> > 
> > Write 3,2,1,flush,0,flush (bypass/fault -> translation)
> > 
> > You still have to preserve both behaviors.
> > 
> > (interestingly neither seem to follow the guidance of the ARM manual,
> > so huh)
> 
> Again, it is probably because the driver never reverts the V
> bit back to 0? While chapter 3.21.3.1 is about V=0 <=> V=1.

No, the driver is using the config in a similar way to V. From what I
can tell it is making a bunch of assumptions that allow this to be OK,
but you have to follow the ordering it already has..


> > The bigger question is does this have to be more generic to handle
> > S1DSS which is it's original design goal?
> 
> It feels an overkill. Maybe "Refactor arm_smmu_write_strtab_ent()"
> is just a too big topic for my goal...

Maybe, it depends if it is necessary
 
> Overall, this version doesn't really dramatically change any STE
> configuration flow compared to what the current driver does, but
> only adds the S1DSS bypass setting. I'd prefer keeping this in a
> smaller scope..

Well, no, this stuff does seem to change the allowed state transitions
that this routine will encounter, or at the very least it sets the
stage for new state transitions that it cannot handle (eg under
iommufd control w/PASID we have problems)

However.. if you imagine staying within the existing kernel driver
behavior maybe just setting S1DSS does work out. It feels very
fragile, it depends on alot of other stuff also being just so.

So, at least for this series you might get buy without, but do check
all the different combinations of attachment's that are possible and
see that the STE doesn't become incorrect.

If it is OK then this patch can be its own series, it needs doing
anyhow.

Jason
  
Nicolin Chen Sept. 26, 2023, 1:52 a.m. UTC | #4
On Mon, Sep 25, 2023 at 09:12:20PM -0300, Jason Gunthorpe wrote:
> On Mon, Sep 25, 2023 at 01:03:10PM -0700, Nicolin Chen wrote:
> > On Mon, Sep 25, 2023 at 03:35:23PM -0300, Jason Gunthorpe wrote:
> > > On Wed, Sep 20, 2023 at 01:52:04PM -0700, Nicolin Chen wrote:
> > > >  
> > > > +static void arm_smmu_ste_stage2_translate(struct arm_smmu_master *master,
> > > > +					  u64 *ste)
> > > > +{
> > > > +	struct arm_smmu_domain *smmu_domain = master->domain;
> > > > +	struct arm_smmu_device *smmu = master->smmu;
> > > > +	struct arm_smmu_s2_cfg *s2_cfg;
> > > > +
> > > > +	switch (smmu_domain->stage) {
> > > > +	case ARM_SMMU_DOMAIN_NESTED:
> > > > +	case ARM_SMMU_DOMAIN_S2:
> > > > +		s2_cfg = &smmu_domain->s2_cfg;
> > > > +		break;
> > > > +	default:
> > > > +		WARN_ON(1);
> > > > +		return;
> > > > +	}
> > > > +
> > > > +	ste[0] |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
> > > > +
> > > > +	if (smmu->features & ARM_SMMU_FEAT_STALLS && !master->stall_enabled)
> > > > +		ste[1] |= STRTAB_STE_1_S1STALLD;
> > > > +
> > > > +	if (master->ats_enabled)
> > > > +		ste[1] |= FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS);
> > > 
> > > These master bits probably belong in their own function 'setup ste for master'
> > > 
> > > The s1 and s2 cases are duplicating these things.
> > 
> > OK. I thought that writing these helpers in form of STE.Config
> > field configurations could be more straightforward despite some
> > duplications.
> 
> Ah, well, if you take that approach then maybe (and the names too) but
> I'm not sure that is the best way..
> 
> The approach I had in mind was to go down a path depending on the
> configuration of the master. eg if you have a type of domain or a cd
> or whatever. That would imply a config, but not necessarily be
> organized by config..

This sounds pretty much like what arm_smmu_write_strtab_ent() is
already doing, but you just want some tidy reorganizations?

So, by separating domain=NULL case to clear_ste(), we could do:

if (cdtab)
	setup_ste_by_cdtab(cdtab, domain); // still needs domain :-/
else if (master->domain->stage == S2)
	setup_ste_by_domain(domain); // literally "by s2_cfg"
else
	setup_ste_bypass();

setup_ste_by_master(); // include this in by_cdtab/by_domain?

> > > > +	for (i = 1; i < 4; i++) {
> > > > +		if (dst[i] == cpu_to_le64(ste[i]))
> > > > +			continue;
> > > > +		dst[i] = cpu_to_le64(ste[i]);
> > > > +		ste_sync_all = true;
> > > > +	}
> > > 
> > > This isn't going to work if the transition is from a fully valid STE
> > > to an invalid one, it will corrupt the still in-use bytes.
> > 
> > The driver currently doesn't have a case of unsetting STE_0_V?
> 
> Sorry, I didn't mean invalid, I ment different but valid.

Then you meant a translation from valid to another valid will
be corrupted? Though that's how the driver currently switches
between valid STE configurations by staging the STE to bypass
or abort mode via detach_dev()?

>  > > Though current code does this:
> > > 
> > > 		dst[0] = cpu_to_le64(val);
> > > 		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
> > > 						STRTAB_STE_1_SHCFG_INCOMING));
> > > 		dst[2] = 0; /* Nuke the VMID */
> > > 
> > > Which I don't really understand either? Why is it OK to wipe the VMID
> > > out of order with the STE.Config change?
> > > 
> > > Be sure to read the part of the SMMU spec talking about how to update
> > > these things, 3.21.3.1 Configuration structure update procedure and
> > > nearby.
> > > 
> > > Regardless there are clearly two orders in the existing code
> > > 
> > > Write 0,1,2,flush (translation -> bypass/fault)
> > > 
> > > Write 3,2,1,flush,0,flush (bypass/fault -> translation)
> > > 
> > > You still have to preserve both behaviors.
> > > 
> > > (interestingly neither seem to follow the guidance of the ARM manual,
> > > so huh)
> > 
> > Again, it is probably because the driver never reverts the V
> > bit back to 0? While chapter 3.21.3.1 is about V=0 <=> V=1.
> 
> No, the driver is using the config in a similar way to V. From what I
> can tell it is making a bunch of assumptions that allow this to be OK,
> but you have to follow the ordering it already has..

The "ordering" in the driver or spec?

I found Robin previous remarks around this topic"
"Strictly I think we are safe to do that - fill in all the S1* fields
 while Config[0] is still 0 and they're ignored, sync, then set
 Config[0]. Adding a CD table under a translation domain should be
 achievable as well, since S1CDMax, S1ContextPtr and S1Fmt can all be
 updated together atomically (although it's still the kind of switcheroo
 where I'd be scared of a massive boulder suddenly rolling out of the
 ceiling...)"

I think this answers most of the questions above?

> > > The bigger question is does this have to be more generic to handle
> > > S1DSS which is it's original design goal?
> > 
> > It feels an overkill. Maybe "Refactor arm_smmu_write_strtab_ent()"
> > is just a too big topic for my goal...
> 
> Maybe, it depends if it is necessary
>  
> > Overall, this version doesn't really dramatically change any STE
> > configuration flow compared to what the current driver does, but
> > only adds the S1DSS bypass setting. I'd prefer keeping this in a
> > smaller scope..
> 
> Well, no, this stuff does seem to change the allowed state transitions
> that this routine will encounter, or at the very least it sets the
> stage for new state transitions that it cannot handle (eg under
> iommufd control w/PASID we have problems)
> 
> However.. if you imagine staying within the existing kernel driver
> behavior maybe just setting S1DSS does work out. It feels very
> fragile, it depends on alot of other stuff also being just so.
> 
> So, at least for this series you might get buy without, but do check
> all the different combinations of attachment's that are possible and
> see that the STE doesn't become incorrect.
> 
> If it is OK then this patch can be its own series, it needs doing
> anyhow.

Fine.. I can try that then. It looks that we have quite a thing
to do before I can respin the vSMMU series. 

Thanks!
Nic
  

Patch

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index dbe11997b4b9..ea0724975d63 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -1248,6 +1248,91 @@  static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
 }
 
+static void arm_smmu_ste_stage2_translate(struct arm_smmu_master *master,
+					  u64 *ste)
+{
+	struct arm_smmu_domain *smmu_domain = master->domain;
+	struct arm_smmu_device *smmu = master->smmu;
+	struct arm_smmu_s2_cfg *s2_cfg;
+
+	switch (smmu_domain->stage) {
+	case ARM_SMMU_DOMAIN_NESTED:
+	case ARM_SMMU_DOMAIN_S2:
+		s2_cfg = &smmu_domain->s2_cfg;
+		break;
+	default:
+		WARN_ON(1);
+		return;
+	}
+
+	ste[0] |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
+
+	if (smmu->features & ARM_SMMU_FEAT_STALLS && !master->stall_enabled)
+		ste[1] |= STRTAB_STE_1_S1STALLD;
+
+	if (master->ats_enabled)
+		ste[1] |= FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS);
+
+	ste[2] |= FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
+		  FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
+#ifdef __BIG_ENDIAN
+		  STRTAB_STE_2_S2ENDI |
+#endif
+		  STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2R;
+
+	ste[3] |= s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK;
+}
+
+static void arm_smmu_ste_stage1_translate(struct arm_smmu_master *master,
+					  u64 *ste)
+{
+	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
+	struct arm_smmu_device *smmu = master->smmu;
+	__le64 *cdptr = arm_smmu_get_cd_ptr(master, 0);
+
+	WARN_ON_ONCE(!cdptr);
+
+	ste[0] |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
+		  FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
+		  FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax) |
+		  FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt);
+
+	if (FIELD_GET(CTXDESC_CD_0_ASID, le64_to_cpu(cdptr[0])))
+		ste[1] |= FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0);
+	else
+		ste[1] |= FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_BYPASS);
+
+	ste[1] |= FIELD_PREP(STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING) |
+		  FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
+		  FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
+		  FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH);
+
+	if (smmu->features & ARM_SMMU_FEAT_E2H)
+		ste[1] |= FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_EL2);
+	else
+		ste[1] |= FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1);
+
+	if (smmu->features & ARM_SMMU_FEAT_STALLS && !master->stall_enabled)
+		ste[1] |= STRTAB_STE_1_S1STALLD;
+
+	if (master->ats_enabled)
+		ste[1] |= FIELD_PREP(STRTAB_STE_1_EATS, STRTAB_STE_1_EATS_TRANS);
+
+	if (master->domain->stage == ARM_SMMU_DOMAIN_NESTED)
+		arm_smmu_ste_stage2_translate(master, ste);
+}
+
+static void arm_smmu_ste_abort(u64 *ste)
+{
+	ste[0] |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
+}
+
+static void arm_smmu_ste_bypass(u64 *ste)
+{
+	ste[0] |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
+	ste[1] |= FIELD_PREP(STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING);
+}
+
 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
 				      __le64 *dst)
 {
@@ -1267,12 +1352,11 @@  static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
 	 * 3. Update Config, sync
 	 */
-	u64 val = le64_to_cpu(dst[0]);
+	int i;
+	u64 ste[4] = {0};
+	bool ste_sync_all = false;
 	bool ste_live = false;
-	struct arm_smmu_device *smmu = NULL;
-	struct arm_smmu_ctx_desc_cfg *cd_table = NULL;
-	struct arm_smmu_s2_cfg *s2_cfg = NULL;
-	struct arm_smmu_domain *smmu_domain = NULL;
+	struct arm_smmu_device *smmu = master->smmu;
 	struct arm_smmu_cmdq_ent prefetch_cmd = {
 		.opcode		= CMDQ_OP_PREFETCH_CFG,
 		.prefetch	= {
@@ -1280,27 +1364,8 @@  static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
 		},
 	};
 
-	if (master) {
-		smmu_domain = master->domain;
-		smmu = master->smmu;
-	}
-
-	if (smmu_domain) {
-		switch (smmu_domain->stage) {
-		case ARM_SMMU_DOMAIN_S1:
-			cd_table = &master->cd_table;
-			break;
-		case ARM_SMMU_DOMAIN_S2:
-		case ARM_SMMU_DOMAIN_NESTED:
-			s2_cfg = &smmu_domain->s2_cfg;
-			break;
-		default:
-			break;
-		}
-	}
-
-	if (val & STRTAB_STE_0_V) {
-		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
+	if (le64_to_cpu(dst[0]) & STRTAB_STE_0_V) {
+		switch (FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(dst[0]))) {
 		case STRTAB_STE_0_CFG_BYPASS:
 			break;
 		case STRTAB_STE_0_CFG_S1_TRANS:
@@ -1315,74 +1380,32 @@  static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
 		}
 	}
 
-	/* Nuke the existing STE_0 value, as we're going to rewrite it */
-	val = STRTAB_STE_0_V;
-
-	/* Bypass/fault */
-	if (!smmu_domain || !(cd_table || s2_cfg)) {
-		if (!smmu_domain && disable_bypass)
-			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
-		else
-			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
-
-		dst[0] = cpu_to_le64(val);
-		dst[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
-						STRTAB_STE_1_SHCFG_INCOMING));
-		dst[2] = 0; /* Nuke the VMID */
-		/*
-		 * The SMMU can perform negative caching, so we must sync
-		 * the STE regardless of whether the old value was live.
-		 */
-		if (smmu)
-			arm_smmu_sync_ste_for_sid(smmu, sid);
-		return;
-	}
-
-	if (cd_table) {
-		u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
-			STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
+	ste[0] = STRTAB_STE_0_V;
 
+	if (master->cd_table.cdtab && master->domain) {
 		BUG_ON(ste_live);
-		dst[1] = cpu_to_le64(
-			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
-			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
-			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
-			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
-			 FIELD_PREP(STRTAB_STE_1_STRW, strw));
-
-		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
-		    !master->stall_enabled)
-			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
-
-		val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
-			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
-			FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax) |
-			FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt);
-	}
-
-	if (s2_cfg) {
+		arm_smmu_ste_stage1_translate(master, ste);
+	} else if (master->domain &&
+		   master->domain->stage == ARM_SMMU_DOMAIN_S2) {
 		BUG_ON(ste_live);
-		dst[2] = cpu_to_le64(
-			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
-			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
-#ifdef __BIG_ENDIAN
-			 STRTAB_STE_2_S2ENDI |
-#endif
-			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
-			 STRTAB_STE_2_S2R);
-
-		dst[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
-
-		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
+		arm_smmu_ste_stage2_translate(master, ste);
+	} else if (!master->domain && disable_bypass) { /* Master is detached */
+		arm_smmu_ste_abort(ste);
+	} else {
+		arm_smmu_ste_bypass(ste);
 	}
 
-	if (master->ats_enabled)
-		dst[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
-						 STRTAB_STE_1_EATS_TRANS));
+	for (i = 1; i < 4; i++) {
+		if (dst[i] == cpu_to_le64(ste[i]))
+			continue;
+		dst[i] = cpu_to_le64(ste[i]);
+		ste_sync_all = true;
+	}
 
-	arm_smmu_sync_ste_for_sid(smmu, sid);
+	if (ste_sync_all)
+		arm_smmu_sync_ste_for_sid(smmu, sid);
 	/* See comment in arm_smmu_write_ctx_desc() */
-	WRITE_ONCE(dst[0], cpu_to_le64(val));
+	WRITE_ONCE(dst[0], cpu_to_le64(ste[0]));
 	arm_smmu_sync_ste_for_sid(smmu, sid);
 
 	/* It's likely that we'll want to use the new STE soon */