[v3,2/9] cxl/acpi: Extract component registers of restricted hosts from RCRB

Message ID 20221109104059.766720-3-rrichter@amd.com
State New
Headers
Series cxl: Add support for Restricted CXL hosts (RCD mode) |

Commit Message

Robert Richter Nov. 9, 2022, 10:40 a.m. UTC
  A downstream port must be connected to a component register block.
For restricted hosts the base address is determined from the RCRB. The
RCRB is provided by the host's CEDT CHBS entry. Rework CEDT parser to
get the RCRB and add code to extract the component register block from
it.

RCRB's BAR[0..1] point to the component block containing CXL subsystem
component registers. MEMBAR extraction follows the PCI base spec here,
esp. 64 bit extraction and memory range alignment (6.0, 7.5.1.2.1).

Note: Right now the component register block is used for HDM decoder
capability only which is optional for RCDs. If unsupported by the RCD,
the HDM init will fail. It is future work to bypass it in this case.

Co-developed-by: Terry Bowman <terry.bowman@amd.com>
Signed-off-by: Terry Bowman <terry.bowman@amd.com>
Signed-off-by: Robert Richter <rrichter@amd.com>
---
 drivers/cxl/acpi.c      | 43 +++++++++++++++++++++++++++++---------
 drivers/cxl/core/regs.c | 46 +++++++++++++++++++++++++++++++++++++++++
 drivers/cxl/cxl.h       |  8 +++++++
 3 files changed, 87 insertions(+), 10 deletions(-)
  

Comments

Dan Williams Nov. 14, 2022, 9:30 p.m. UTC | #1
Robert Richter wrote:
> A downstream port must be connected to a component register block.
> For restricted hosts the base address is determined from the RCRB. The
> RCRB is provided by the host's CEDT CHBS entry. Rework CEDT parser to
> get the RCRB and add code to extract the component register block from
> it.
> 
> RCRB's BAR[0..1] point to the component block containing CXL subsystem
> component registers. MEMBAR extraction follows the PCI base spec here,
> esp. 64 bit extraction and memory range alignment (6.0, 7.5.1.2.1).
> 
> Note: Right now the component register block is used for HDM decoder
> capability only which is optional for RCDs. If unsupported by the RCD,
> the HDM init will fail. It is future work to bypass it in this case.
> 
> Co-developed-by: Terry Bowman <terry.bowman@amd.com>
> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
> Signed-off-by: Robert Richter <rrichter@amd.com>
> ---
>  drivers/cxl/acpi.c      | 43 +++++++++++++++++++++++++++++---------
>  drivers/cxl/core/regs.c | 46 +++++++++++++++++++++++++++++++++++++++++
>  drivers/cxl/cxl.h       |  8 +++++++
>  3 files changed, 87 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
> index 06150c953f58..caea42cf9522 100644
> --- a/drivers/cxl/acpi.c
> +++ b/drivers/cxl/acpi.c
> @@ -9,6 +9,8 @@
>  #include "cxlpci.h"
>  #include "cxl.h"
>  
> +#define CXL_RCRB_SIZE	SZ_8K
> +
>  static unsigned long cfmws_to_decoder_flags(int restrictions)
>  {
>  	unsigned long flags = CXL_DECODER_F_ENABLE;
> @@ -240,27 +242,46 @@ static int add_host_bridge_uport(struct device *match, void *arg)
>  struct cxl_chbs_context {
>  	struct device *dev;
>  	unsigned long long uid;
> -	resource_size_t chbcr;
> +	struct acpi_cedt_chbs chbs;
>  };
>  
> -static int cxl_get_chbcr(union acpi_subtable_headers *header, void *arg,
> -			 const unsigned long end)
> +static int cxl_get_chbs(union acpi_subtable_headers *header, void *arg,
> +			const unsigned long end)
>  {
>  	struct cxl_chbs_context *ctx = arg;
>  	struct acpi_cedt_chbs *chbs;
>  
> -	if (ctx->chbcr)
> +	if (ctx->chbs.base)
>  		return 0;
>  
>  	chbs = (struct acpi_cedt_chbs *) header;
>  
>  	if (ctx->uid != chbs->uid)
>  		return 0;
> -	ctx->chbcr = chbs->base;
> +	ctx->chbs = *chbs;
>  
>  	return 0;
>  }
>  
> +static resource_size_t cxl_get_chbcr(struct cxl_chbs_context *ctx)
> +{
> +	struct acpi_cedt_chbs *chbs = &ctx->chbs;
> +
> +	if (!chbs->base)
> +		return CXL_RESOURCE_NONE;
> +
> +	if (chbs->cxl_version != ACPI_CEDT_CHBS_VERSION_CXL11)
> +		return chbs->base;
> +
> +	if (chbs->length != CXL_RCRB_SIZE)
> +		return CXL_RESOURCE_NONE;
> +
> +	dev_dbg(ctx->dev, "RCRB found for UID %lld: 0x%08llx\n",
> +		ctx->uid, (u64)chbs->base);
> +
> +	return cxl_rcrb_to_component(ctx->dev, chbs->base, CXL_RCRB_DOWNSTREAM);
> +}
> +
>  static int add_host_bridge_dport(struct device *match, void *arg)
>  {
>  	acpi_status status;
> @@ -272,6 +293,7 @@ static int add_host_bridge_dport(struct device *match, void *arg)
>  	struct acpi_pci_root *pci_root = to_cxl_pci_root(host, match);
>  	struct device *bridge;
>  	acpi_handle handle;
> +	resource_size_t component_reg_phys;
>  
>  	if (!pci_root)
>  		return 0;
> @@ -287,19 +309,20 @@ static int add_host_bridge_dport(struct device *match, void *arg)
>  	dev_dbg(match, "UID found: %lld\n", uid);
>  
>  	ctx = (struct cxl_chbs_context) {
> -		.dev = host,
> +		.dev = match,
>  		.uid = uid,
>  	};
> -	acpi_table_parse_cedt(ACPI_CEDT_TYPE_CHBS, cxl_get_chbcr, &ctx);
> +	acpi_table_parse_cedt(ACPI_CEDT_TYPE_CHBS, cxl_get_chbs, &ctx);
>  
> -	if (ctx.chbcr == 0) {
> +	component_reg_phys = cxl_get_chbcr(&ctx);
> +	if (component_reg_phys == CXL_RESOURCE_NONE) {
>  		dev_warn(match, "No CHBS found for Host Bridge (UID %lld)\n", uid);
>  		return 0;
>  	}
>  
> -	dev_dbg(match, "CHBCR found: 0x%08llx\n", (u64)ctx.chbcr);
> +	dev_dbg(match, "CHBCR found: 0x%08llx\n", (u64)component_reg_phys);
>  
> -	dport = devm_cxl_add_dport(root_port, bridge, uid, ctx.chbcr);
> +	dport = devm_cxl_add_dport(root_port, bridge, uid, component_reg_phys);
>  	if (IS_ERR(dport))
>  		return PTR_ERR(dport);
>  
> diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
> index ec178e69b18f..7a5bde81e949 100644
> --- a/drivers/cxl/core/regs.c
> +++ b/drivers/cxl/core/regs.c
> @@ -307,3 +307,49 @@ int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
>  	return -ENODEV;
>  }
>  EXPORT_SYMBOL_NS_GPL(cxl_find_regblock, CXL);
> +
> +resource_size_t cxl_rcrb_to_component(struct device *dev,
> +				      resource_size_t rcrb,
> +				      enum cxl_rcrb which)
> +{
> +	resource_size_t component_reg_phys;
> +	u32 bar0, bar1;
> +	void *addr;
> +
> +	if (which == CXL_RCRB_UPSTREAM)
> +		rcrb += SZ_4K;
> +
> +	/*
> +	 * RCRB's BAR[0..1] point to component block containing CXL
> +	 * subsystem component registers. MEMBAR extraction follows
> +	 * the PCI Base spec here, esp. 64 bit extraction and memory
> +	 * ranges alignment (6.0, 7.5.1.2.1).
> +	 */

A request_mem_region() is needed here to ensure ownership and expected
sequencing of accessing the RCRB to locate the component registers, and
accessing the RCRB to manipulate the component registers. It also helps
to sanity check that the BIOS mapped an exclusive range for the RCRB.

> +	addr = ioremap(rcrb, PCI_BASE_ADDRESS_0 + SZ_8);

That PCI_BASE_ADDRESS_0 does not belong there. It ends up being benign
and forcing ioremap to map 12K instead of 8K, but it is a
config-register offset, not part of the RCRB size.

> +	if (!addr) {
> +		dev_err(dev, "Failed to map region %pr\n", addr);
> +		return CXL_RESOURCE_NONE;
> +	}
> +
> +	bar0 = readl(addr + PCI_BASE_ADDRESS_0);
> +	bar1 = readl(addr + PCI_BASE_ADDRESS_1);
> +	iounmap(addr);

...corresponding release_mem_region() would go here.

> +
> +	/* sanity check */
> +	if (bar0 & (PCI_BASE_ADDRESS_MEM_TYPE_1M | PCI_BASE_ADDRESS_SPACE_IO))
> +		return CXL_RESOURCE_NONE;

I would have also expected:

- a sanity check for "Memory Space Enable" being set in the command
  register.

- an explicit check for 0xffffffff for the case when the upstream-port
  implements "no RCRB" mode.

- some check that BIOS initialized the BAR values post reset given these
  BARs are invisible to the PCI core resource assignment
  
Robert Richter Nov. 15, 2022, 12:17 p.m. UTC | #2
On 14.11.22 13:30:01, Dan Williams wrote:
> Robert Richter wrote:

> > diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
> > index ec178e69b18f..7a5bde81e949 100644
> > --- a/drivers/cxl/core/regs.c
> > +++ b/drivers/cxl/core/regs.c
> > @@ -307,3 +307,49 @@ int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
> >  	return -ENODEV;
> >  }
> >  EXPORT_SYMBOL_NS_GPL(cxl_find_regblock, CXL);
> > +
> > +resource_size_t cxl_rcrb_to_component(struct device *dev,
> > +				      resource_size_t rcrb,
> > +				      enum cxl_rcrb which)
> > +{
> > +	resource_size_t component_reg_phys;
> > +	u32 bar0, bar1;
> > +	void *addr;
> > +
> > +	if (which == CXL_RCRB_UPSTREAM)
> > +		rcrb += SZ_4K;
> > +
> > +	/*
> > +	 * RCRB's BAR[0..1] point to component block containing CXL
> > +	 * subsystem component registers. MEMBAR extraction follows
> > +	 * the PCI Base spec here, esp. 64 bit extraction and memory
> > +	 * ranges alignment (6.0, 7.5.1.2.1).
> > +	 */
> 
> A request_mem_region() is needed here to ensure ownership and expected
> sequencing of accessing the RCRB to locate the component registers, and
> accessing the RCRB to manipulate the component registers. It also helps
> to sanity check that the BIOS mapped an exclusive range for the RCRB.

Right, that is missing.

> 
> > +	addr = ioremap(rcrb, PCI_BASE_ADDRESS_0 + SZ_8);
> 
> That PCI_BASE_ADDRESS_0 does not belong there. It ends up being benign
> and forcing ioremap to map 12K instead of 8K, but it is a
> config-register offset, not part of the RCRB size.

Note this is BAR0 + 8 bytes, not 8k, and it does not map the whole
RCRB region but instead the first part of the config space up to
including the 64 bit BAR.

> 
> > +	if (!addr) {
> > +		dev_err(dev, "Failed to map region %pr\n", addr);
> > +		return CXL_RESOURCE_NONE;
> > +	}
> > +
> > +	bar0 = readl(addr + PCI_BASE_ADDRESS_0);
> > +	bar1 = readl(addr + PCI_BASE_ADDRESS_1);
> > +	iounmap(addr);
> 
> ...corresponding release_mem_region() would go here.
> 
> > +
> > +	/* sanity check */
> > +	if (bar0 & (PCI_BASE_ADDRESS_MEM_TYPE_1M | PCI_BASE_ADDRESS_SPACE_IO))
> > +		return CXL_RESOURCE_NONE;
> 
> I would have also expected:
> 
> - a sanity check for "Memory Space Enable" being set in the command
>   register.

Ok.

> 
> - an explicit check for 0xffffffff for the case when the upstream-port
>   implements "no RCRB" mode.

Yes, I left support for this to a later patch, but it's better to
check it here already and possibly fall back to reg loc DVSEC then.

> 
> - some check that BIOS initialized the BAR values post reset given these
>   BARs are invisible to the PCI core resource assignment 

What check do you have in mind here? There is already the NULL check
which would be the out-of-reset value.

Thanks,

-Robert
  
Dan Williams Nov. 15, 2022, 5:54 p.m. UTC | #3
Robert Richter wrote:
> On 14.11.22 13:30:01, Dan Williams wrote:
> > Robert Richter wrote:
> 
> > > diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
> > > index ec178e69b18f..7a5bde81e949 100644
> > > --- a/drivers/cxl/core/regs.c
> > > +++ b/drivers/cxl/core/regs.c
> > > @@ -307,3 +307,49 @@ int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
> > >  	return -ENODEV;
> > >  }
> > >  EXPORT_SYMBOL_NS_GPL(cxl_find_regblock, CXL);
> > > +
> > > +resource_size_t cxl_rcrb_to_component(struct device *dev,
> > > +				      resource_size_t rcrb,
> > > +				      enum cxl_rcrb which)
> > > +{
> > > +	resource_size_t component_reg_phys;
> > > +	u32 bar0, bar1;
> > > +	void *addr;
> > > +
> > > +	if (which == CXL_RCRB_UPSTREAM)
> > > +		rcrb += SZ_4K;
> > > +
> > > +	/*
> > > +	 * RCRB's BAR[0..1] point to component block containing CXL
> > > +	 * subsystem component registers. MEMBAR extraction follows
> > > +	 * the PCI Base spec here, esp. 64 bit extraction and memory
> > > +	 * ranges alignment (6.0, 7.5.1.2.1).
> > > +	 */
> > 
> > A request_mem_region() is needed here to ensure ownership and expected
> > sequencing of accessing the RCRB to locate the component registers, and
> > accessing the RCRB to manipulate the component registers. It also helps
> > to sanity check that the BIOS mapped an exclusive range for the RCRB.
> 
> Right, that is missing.
> 
> > 
> > > +	addr = ioremap(rcrb, PCI_BASE_ADDRESS_0 + SZ_8);
> > 
> > That PCI_BASE_ADDRESS_0 does not belong there. It ends up being benign
> > and forcing ioremap to map 12K instead of 8K, but it is a
> > config-register offset, not part of the RCRB size.
> 
> Note this is BAR0 + 8 bytes, not 8k, and it does not map the whole
> RCRB region but instead the first part of the config space up to
> including the 64 bit BAR.

Oh, sorry, yes, my mistake. However, there is not much value in mapping
less than 4K since all ioremap requests are rounded up to PAGE_SIZE.
Since an RCRB is only 4K per port lets just map the whole thing.

> > > +	if (!addr) {
> > > +		dev_err(dev, "Failed to map region %pr\n", addr);
> > > +		return CXL_RESOURCE_NONE;
> > > +	}
> > > +
> > > +	bar0 = readl(addr + PCI_BASE_ADDRESS_0);
> > > +	bar1 = readl(addr + PCI_BASE_ADDRESS_1);
> > > +	iounmap(addr);
> > 
> > ...corresponding release_mem_region() would go here.
> > 
> > > +
> > > +	/* sanity check */
> > > +	if (bar0 & (PCI_BASE_ADDRESS_MEM_TYPE_1M | PCI_BASE_ADDRESS_SPACE_IO))
> > > +		return CXL_RESOURCE_NONE;
> > 
> > I would have also expected:
> > 
> > - a sanity check for "Memory Space Enable" being set in the command
> >   register.
> 
> Ok.
> 
> > 
> > - an explicit check for 0xffffffff for the case when the upstream-port
> >   implements "no RCRB" mode.
> 
> Yes, I left support for this to a later patch, but it's better to
> check it here already and possibly fall back to reg loc DVSEC then.

Yeah, I think simply failing on 0xffffffff is sufficient for now.

> > 
> > - some check that BIOS initialized the BAR values post reset given these
> >   BARs are invisible to the PCI core resource assignment 
> 
> What check do you have in mind here? There is already the NULL check
> which would be the out-of-reset value.

I was thinking more along the lines of sanity checking that the
programmed RCRB range falls within the assigned MMIO space of the
host-bridge, but perhaps that is overkill since it would just be
validating self consistency between 2 BIOS provided values. Robustness
principle would say try to continue if those disagree.
  
Robert Richter Nov. 17, 2022, 12:43 p.m. UTC | #4
On 15.11.22 09:54:16, Dan Williams wrote:
> Robert Richter wrote:
> > On 14.11.22 13:30:01, Dan Williams wrote:
> > > Robert Richter wrote:
> > 
> > > > diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
> > > > index ec178e69b18f..7a5bde81e949 100644
> > > > --- a/drivers/cxl/core/regs.c
> > > > +++ b/drivers/cxl/core/regs.c
> > > > @@ -307,3 +307,49 @@ int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
> > > >  	return -ENODEV;
> > > >  }
> > > >  EXPORT_SYMBOL_NS_GPL(cxl_find_regblock, CXL);
> > > > +
> > > > +resource_size_t cxl_rcrb_to_component(struct device *dev,
> > > > +				      resource_size_t rcrb,
> > > > +				      enum cxl_rcrb which)
> > > > +{
> > > > +	resource_size_t component_reg_phys;
> > > > +	u32 bar0, bar1;
> > > > +	void *addr;
> > > > +
> > > > +	if (which == CXL_RCRB_UPSTREAM)
> > > > +		rcrb += SZ_4K;
> > > > +
> > > > +	/*
> > > > +	 * RCRB's BAR[0..1] point to component block containing CXL
> > > > +	 * subsystem component registers. MEMBAR extraction follows
> > > > +	 * the PCI Base spec here, esp. 64 bit extraction and memory
> > > > +	 * ranges alignment (6.0, 7.5.1.2.1).
> > > > +	 */
> > > 
> > > A request_mem_region() is needed here to ensure ownership and expected
> > > sequencing of accessing the RCRB to locate the component registers, and
> > > accessing the RCRB to manipulate the component registers. It also helps
> > > to sanity check that the BIOS mapped an exclusive range for the RCRB.
> > 
> > Right, that is missing.
> > 
> > > 
> > > > +	addr = ioremap(rcrb, PCI_BASE_ADDRESS_0 + SZ_8);
> > > 
> > > That PCI_BASE_ADDRESS_0 does not belong there. It ends up being benign
> > > and forcing ioremap to map 12K instead of 8K, but it is a
> > > config-register offset, not part of the RCRB size.
> > 
> > Note this is BAR0 + 8 bytes, not 8k, and it does not map the whole
> > RCRB region but instead the first part of the config space up to
> > including the 64 bit BAR.
> 
> Oh, sorry, yes, my mistake. However, there is not much value in mapping
> less than 4K since all ioremap requests are rounded up to PAGE_SIZE.
> Since an RCRB is only 4K per port lets just map the whole thing.

I was going to keep the ranges small to avoid conflicts with other
requests for the same page (though request_mem_region() was missing
yet).

> 
> > > > +	if (!addr) {
> > > > +		dev_err(dev, "Failed to map region %pr\n", addr);
> > > > +		return CXL_RESOURCE_NONE;
> > > > +	}
> > > > +
> > > > +	bar0 = readl(addr + PCI_BASE_ADDRESS_0);
> > > > +	bar1 = readl(addr + PCI_BASE_ADDRESS_1);
> > > > +	iounmap(addr);
> > > 
> > > ...corresponding release_mem_region() would go here.
> > > 
> > > > +
> > > > +	/* sanity check */
> > > > +	if (bar0 & (PCI_BASE_ADDRESS_MEM_TYPE_1M | PCI_BASE_ADDRESS_SPACE_IO))
> > > > +		return CXL_RESOURCE_NONE;
> > > 
> > > I would have also expected:
> > > 
> > > - a sanity check for "Memory Space Enable" being set in the command
> > >   register.
> > 
> > Ok.
> > 
> > > 
> > > - an explicit check for 0xffffffff for the case when the upstream-port
> > >   implements "no RCRB" mode.
> > 
> > Yes, I left support for this to a later patch, but it's better to
> > check it here already and possibly fall back to reg loc DVSEC then.
> 
> Yeah, I think simply failing on 0xffffffff is sufficient for now.
> 
> > > 
> > > - some check that BIOS initialized the BAR values post reset given these
> > >   BARs are invisible to the PCI core resource assignment 
> > 
> > What check do you have in mind here? There is already the NULL check
> > which would be the out-of-reset value.
> 
> I was thinking more along the lines of sanity checking that the
> programmed RCRB range falls within the assigned MMIO space of the
> host-bridge, but perhaps that is overkill since it would just be
> validating self consistency between 2 BIOS provided values. Robustness
> principle would say try to continue if those disagree.

Ok, will drop a check here.

Thanks,

-Robert
  
Dan Williams Nov. 17, 2022, 5:20 p.m. UTC | #5
Robert Richter wrote:
> On 15.11.22 09:54:16, Dan Williams wrote:
> > Robert Richter wrote:
> > > On 14.11.22 13:30:01, Dan Williams wrote:
> > > > Robert Richter wrote:
> > > 
> > > > > diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
> > > > > index ec178e69b18f..7a5bde81e949 100644
> > > > > --- a/drivers/cxl/core/regs.c
> > > > > +++ b/drivers/cxl/core/regs.c
> > > > > @@ -307,3 +307,49 @@ int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
> > > > >  	return -ENODEV;
> > > > >  }
> > > > >  EXPORT_SYMBOL_NS_GPL(cxl_find_regblock, CXL);
> > > > > +
> > > > > +resource_size_t cxl_rcrb_to_component(struct device *dev,
> > > > > +				      resource_size_t rcrb,
> > > > > +				      enum cxl_rcrb which)
> > > > > +{
> > > > > +	resource_size_t component_reg_phys;
> > > > > +	u32 bar0, bar1;
> > > > > +	void *addr;
> > > > > +
> > > > > +	if (which == CXL_RCRB_UPSTREAM)
> > > > > +		rcrb += SZ_4K;
> > > > > +
> > > > > +	/*
> > > > > +	 * RCRB's BAR[0..1] point to component block containing CXL
> > > > > +	 * subsystem component registers. MEMBAR extraction follows
> > > > > +	 * the PCI Base spec here, esp. 64 bit extraction and memory
> > > > > +	 * ranges alignment (6.0, 7.5.1.2.1).
> > > > > +	 */
> > > > 
> > > > A request_mem_region() is needed here to ensure ownership and expected
> > > > sequencing of accessing the RCRB to locate the component registers, and
> > > > accessing the RCRB to manipulate the component registers. It also helps
> > > > to sanity check that the BIOS mapped an exclusive range for the RCRB.
> > > 
> > > Right, that is missing.
> > > 
> > > > 
> > > > > +	addr = ioremap(rcrb, PCI_BASE_ADDRESS_0 + SZ_8);
> > > > 
> > > > That PCI_BASE_ADDRESS_0 does not belong there. It ends up being benign
> > > > and forcing ioremap to map 12K instead of 8K, but it is a
> > > > config-register offset, not part of the RCRB size.
> > > 
> > > Note this is BAR0 + 8 bytes, not 8k, and it does not map the whole
> > > RCRB region but instead the first part of the config space up to
> > > including the 64 bit BAR.
> > 
> > Oh, sorry, yes, my mistake. However, there is not much value in mapping
> > less than 4K since all ioremap requests are rounded up to PAGE_SIZE.
> > Since an RCRB is only 4K per port lets just map the whole thing.
> 
> I was going to keep the ranges small to avoid conflicts with other
> requests for the same page (though request_mem_region() was missing
> yet).

What else will be conflicting the RCRB? Linux has never accessed an RCRB
in the past as far as I can see. If there is a conflict then we may need
to move this mapping to the PCI core so that it is managed like other
mmconf space.
  
Robert Richter Nov. 17, 2022, 6:25 p.m. UTC | #6
On 17.11.22 09:20:55, Dan Williams wrote:
> Robert Richter wrote:
> > On 15.11.22 09:54:16, Dan Williams wrote:
> > > Robert Richter wrote:
> > > > On 14.11.22 13:30:01, Dan Williams wrote:
> > > > > Robert Richter wrote:
> > > > 
> > > > > > diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
> > > > > > index ec178e69b18f..7a5bde81e949 100644
> > > > > > --- a/drivers/cxl/core/regs.c
> > > > > > +++ b/drivers/cxl/core/regs.c
> > > > > > @@ -307,3 +307,49 @@ int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
> > > > > >  	return -ENODEV;
> > > > > >  }
> > > > > >  EXPORT_SYMBOL_NS_GPL(cxl_find_regblock, CXL);
> > > > > > +
> > > > > > +resource_size_t cxl_rcrb_to_component(struct device *dev,
> > > > > > +				      resource_size_t rcrb,
> > > > > > +				      enum cxl_rcrb which)
> > > > > > +{
> > > > > > +	resource_size_t component_reg_phys;
> > > > > > +	u32 bar0, bar1;
> > > > > > +	void *addr;
> > > > > > +
> > > > > > +	if (which == CXL_RCRB_UPSTREAM)
> > > > > > +		rcrb += SZ_4K;
> > > > > > +
> > > > > > +	/*
> > > > > > +	 * RCRB's BAR[0..1] point to component block containing CXL
> > > > > > +	 * subsystem component registers. MEMBAR extraction follows
> > > > > > +	 * the PCI Base spec here, esp. 64 bit extraction and memory
> > > > > > +	 * ranges alignment (6.0, 7.5.1.2.1).
> > > > > > +	 */
> > > > > 
> > > > > A request_mem_region() is needed here to ensure ownership and expected
> > > > > sequencing of accessing the RCRB to locate the component registers, and
> > > > > accessing the RCRB to manipulate the component registers. It also helps
> > > > > to sanity check that the BIOS mapped an exclusive range for the RCRB.
> > > > 
> > > > Right, that is missing.
> > > > 
> > > > > 
> > > > > > +	addr = ioremap(rcrb, PCI_BASE_ADDRESS_0 + SZ_8);
> > > > > 
> > > > > That PCI_BASE_ADDRESS_0 does not belong there. It ends up being benign
> > > > > and forcing ioremap to map 12K instead of 8K, but it is a
> > > > > config-register offset, not part of the RCRB size.
> > > > 
> > > > Note this is BAR0 + 8 bytes, not 8k, and it does not map the whole
> > > > RCRB region but instead the first part of the config space up to
> > > > including the 64 bit BAR.
> > > 
> > > Oh, sorry, yes, my mistake. However, there is not much value in mapping
> > > less than 4K since all ioremap requests are rounded up to PAGE_SIZE.
> > > Since an RCRB is only 4K per port lets just map the whole thing.
> > 
> > I was going to keep the ranges small to avoid conflicts with other
> > requests for the same page (though request_mem_region() was missing
> > yet).
> 
> What else will be conflicting the RCRB? Linux has never accessed an RCRB
> in the past as far as I can see. If there is a conflict then we may need
> to move this mapping to the PCI core so that it is managed like other
> mmconf space.

The capabilities (PCIe and DVSEC) could be used by various subsystems
and parts of the driver. I am thinking of the various RAS caps (UP,
DP, CXL, AER variants) that are accessed from different parts of the
driver. Of curse, access could be delegated but else there is the
option to directly map and access that parts. In the component reg
block we already see issues with that broad mappings.

-Robert
  
Dan Williams Nov. 17, 2022, 7:23 p.m. UTC | #7
Robert Richter wrote:
> On 17.11.22 09:20:55, Dan Williams wrote:
> > Robert Richter wrote:
> > > On 15.11.22 09:54:16, Dan Williams wrote:
> > > > Robert Richter wrote:
> > > > > On 14.11.22 13:30:01, Dan Williams wrote:
> > > > > > Robert Richter wrote:
> > > > > 
> > > > > > > diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
> > > > > > > index ec178e69b18f..7a5bde81e949 100644
> > > > > > > --- a/drivers/cxl/core/regs.c
> > > > > > > +++ b/drivers/cxl/core/regs.c
> > > > > > > @@ -307,3 +307,49 @@ int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
> > > > > > >  	return -ENODEV;
> > > > > > >  }
> > > > > > >  EXPORT_SYMBOL_NS_GPL(cxl_find_regblock, CXL);
> > > > > > > +
> > > > > > > +resource_size_t cxl_rcrb_to_component(struct device *dev,
> > > > > > > +				      resource_size_t rcrb,
> > > > > > > +				      enum cxl_rcrb which)
> > > > > > > +{
> > > > > > > +	resource_size_t component_reg_phys;
> > > > > > > +	u32 bar0, bar1;
> > > > > > > +	void *addr;
> > > > > > > +
> > > > > > > +	if (which == CXL_RCRB_UPSTREAM)
> > > > > > > +		rcrb += SZ_4K;
> > > > > > > +
> > > > > > > +	/*
> > > > > > > +	 * RCRB's BAR[0..1] point to component block containing CXL
> > > > > > > +	 * subsystem component registers. MEMBAR extraction follows
> > > > > > > +	 * the PCI Base spec here, esp. 64 bit extraction and memory
> > > > > > > +	 * ranges alignment (6.0, 7.5.1.2.1).
> > > > > > > +	 */
> > > > > > 
> > > > > > A request_mem_region() is needed here to ensure ownership and expected
> > > > > > sequencing of accessing the RCRB to locate the component registers, and
> > > > > > accessing the RCRB to manipulate the component registers. It also helps
> > > > > > to sanity check that the BIOS mapped an exclusive range for the RCRB.
> > > > > 
> > > > > Right, that is missing.
> > > > > 
> > > > > > 
> > > > > > > +	addr = ioremap(rcrb, PCI_BASE_ADDRESS_0 + SZ_8);
> > > > > > 
> > > > > > That PCI_BASE_ADDRESS_0 does not belong there. It ends up being benign
> > > > > > and forcing ioremap to map 12K instead of 8K, but it is a
> > > > > > config-register offset, not part of the RCRB size.
> > > > > 
> > > > > Note this is BAR0 + 8 bytes, not 8k, and it does not map the whole
> > > > > RCRB region but instead the first part of the config space up to
> > > > > including the 64 bit BAR.
> > > > 
> > > > Oh, sorry, yes, my mistake. However, there is not much value in mapping
> > > > less than 4K since all ioremap requests are rounded up to PAGE_SIZE.
> > > > Since an RCRB is only 4K per port lets just map the whole thing.
> > > 
> > > I was going to keep the ranges small to avoid conflicts with other
> > > requests for the same page (though request_mem_region() was missing
> > > yet).
> > 
> > What else will be conflicting the RCRB? Linux has never accessed an RCRB
> > in the past as far as I can see. If there is a conflict then we may need
> > to move this mapping to the PCI core so that it is managed like other
> > mmconf space.
> 
> The capabilities (PCIe and DVSEC) could be used by various subsystems
> and parts of the driver. I am thinking of the various RAS caps (UP,
> DP, CXL, AER variants) that are accessed from different parts of the
> driver. Of curse, access could be delegated but else there is the
> option to directly map and access that parts. In the component reg
> block we already see issues with that broad mappings.

Sure, but lets cross that bridge when we get to that point. Something is
broken if these competing usages can not at least have their own page
mapping since that limits being able to hand out control across security
boundaries (like VMs or userspace). Any ioremap less than PAGE_SIZE is
somewhat suspect.

The cxl_port driver so far seems to be sufficient for owning the entire
component register space.
  
Robert Richter Nov. 18, 2022, 8:12 a.m. UTC | #8
On 17.11.22 11:23:16, Dan Williams wrote:
> Robert Richter wrote:
> > On 17.11.22 09:20:55, Dan Williams wrote:
> > > Robert Richter wrote:
> > > > On 15.11.22 09:54:16, Dan Williams wrote:
> > > > > Robert Richter wrote:
> > > > > > On 14.11.22 13:30:01, Dan Williams wrote:

> > > > > Oh, sorry, yes, my mistake. However, there is not much value in mapping
> > > > > less than 4K since all ioremap requests are rounded up to PAGE_SIZE.
> > > > > Since an RCRB is only 4K per port lets just map the whole thing.
> > > > 
> > > > I was going to keep the ranges small to avoid conflicts with other
> > > > requests for the same page (though request_mem_region() was missing
> > > > yet).
> > > 
> > > What else will be conflicting the RCRB? Linux has never accessed an RCRB
> > > in the past as far as I can see. If there is a conflict then we may need
> > > to move this mapping to the PCI core so that it is managed like other
> > > mmconf space.
> > 
> > The capabilities (PCIe and DVSEC) could be used by various subsystems
> > and parts of the driver. I am thinking of the various RAS caps (UP,
> > DP, CXL, AER variants) that are accessed from different parts of the
> > driver. Of curse, access could be delegated but else there is the
> > option to directly map and access that parts. In the component reg
> > block we already see issues with that broad mappings.
> 
> Sure, but lets cross that bridge when we get to that point. Something is
> broken if these competing usages can not at least have their own page
> mapping since that limits being able to hand out control across security
> boundaries (like VMs or userspace). Any ioremap less than PAGE_SIZE is
> somewhat suspect.
> 
> The cxl_port driver so far seems to be sufficient for owning the entire
> component register space.

Ok, I can change that.

Thanks,

-Robert
  

Patch

diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index 06150c953f58..caea42cf9522 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -9,6 +9,8 @@ 
 #include "cxlpci.h"
 #include "cxl.h"
 
+#define CXL_RCRB_SIZE	SZ_8K
+
 static unsigned long cfmws_to_decoder_flags(int restrictions)
 {
 	unsigned long flags = CXL_DECODER_F_ENABLE;
@@ -240,27 +242,46 @@  static int add_host_bridge_uport(struct device *match, void *arg)
 struct cxl_chbs_context {
 	struct device *dev;
 	unsigned long long uid;
-	resource_size_t chbcr;
+	struct acpi_cedt_chbs chbs;
 };
 
-static int cxl_get_chbcr(union acpi_subtable_headers *header, void *arg,
-			 const unsigned long end)
+static int cxl_get_chbs(union acpi_subtable_headers *header, void *arg,
+			const unsigned long end)
 {
 	struct cxl_chbs_context *ctx = arg;
 	struct acpi_cedt_chbs *chbs;
 
-	if (ctx->chbcr)
+	if (ctx->chbs.base)
 		return 0;
 
 	chbs = (struct acpi_cedt_chbs *) header;
 
 	if (ctx->uid != chbs->uid)
 		return 0;
-	ctx->chbcr = chbs->base;
+	ctx->chbs = *chbs;
 
 	return 0;
 }
 
+static resource_size_t cxl_get_chbcr(struct cxl_chbs_context *ctx)
+{
+	struct acpi_cedt_chbs *chbs = &ctx->chbs;
+
+	if (!chbs->base)
+		return CXL_RESOURCE_NONE;
+
+	if (chbs->cxl_version != ACPI_CEDT_CHBS_VERSION_CXL11)
+		return chbs->base;
+
+	if (chbs->length != CXL_RCRB_SIZE)
+		return CXL_RESOURCE_NONE;
+
+	dev_dbg(ctx->dev, "RCRB found for UID %lld: 0x%08llx\n",
+		ctx->uid, (u64)chbs->base);
+
+	return cxl_rcrb_to_component(ctx->dev, chbs->base, CXL_RCRB_DOWNSTREAM);
+}
+
 static int add_host_bridge_dport(struct device *match, void *arg)
 {
 	acpi_status status;
@@ -272,6 +293,7 @@  static int add_host_bridge_dport(struct device *match, void *arg)
 	struct acpi_pci_root *pci_root = to_cxl_pci_root(host, match);
 	struct device *bridge;
 	acpi_handle handle;
+	resource_size_t component_reg_phys;
 
 	if (!pci_root)
 		return 0;
@@ -287,19 +309,20 @@  static int add_host_bridge_dport(struct device *match, void *arg)
 	dev_dbg(match, "UID found: %lld\n", uid);
 
 	ctx = (struct cxl_chbs_context) {
-		.dev = host,
+		.dev = match,
 		.uid = uid,
 	};
-	acpi_table_parse_cedt(ACPI_CEDT_TYPE_CHBS, cxl_get_chbcr, &ctx);
+	acpi_table_parse_cedt(ACPI_CEDT_TYPE_CHBS, cxl_get_chbs, &ctx);
 
-	if (ctx.chbcr == 0) {
+	component_reg_phys = cxl_get_chbcr(&ctx);
+	if (component_reg_phys == CXL_RESOURCE_NONE) {
 		dev_warn(match, "No CHBS found for Host Bridge (UID %lld)\n", uid);
 		return 0;
 	}
 
-	dev_dbg(match, "CHBCR found: 0x%08llx\n", (u64)ctx.chbcr);
+	dev_dbg(match, "CHBCR found: 0x%08llx\n", (u64)component_reg_phys);
 
-	dport = devm_cxl_add_dport(root_port, bridge, uid, ctx.chbcr);
+	dport = devm_cxl_add_dport(root_port, bridge, uid, component_reg_phys);
 	if (IS_ERR(dport))
 		return PTR_ERR(dport);
 
diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
index ec178e69b18f..7a5bde81e949 100644
--- a/drivers/cxl/core/regs.c
+++ b/drivers/cxl/core/regs.c
@@ -307,3 +307,49 @@  int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
 	return -ENODEV;
 }
 EXPORT_SYMBOL_NS_GPL(cxl_find_regblock, CXL);
+
+resource_size_t cxl_rcrb_to_component(struct device *dev,
+				      resource_size_t rcrb,
+				      enum cxl_rcrb which)
+{
+	resource_size_t component_reg_phys;
+	u32 bar0, bar1;
+	void *addr;
+
+	if (which == CXL_RCRB_UPSTREAM)
+		rcrb += SZ_4K;
+
+	/*
+	 * RCRB's BAR[0..1] point to component block containing CXL
+	 * subsystem component registers. MEMBAR extraction follows
+	 * the PCI Base spec here, esp. 64 bit extraction and memory
+	 * ranges alignment (6.0, 7.5.1.2.1).
+	 */
+	addr = ioremap(rcrb, PCI_BASE_ADDRESS_0 + SZ_8);
+	if (!addr) {
+		dev_err(dev, "Failed to map region %pr\n", addr);
+		return CXL_RESOURCE_NONE;
+	}
+
+	bar0 = readl(addr + PCI_BASE_ADDRESS_0);
+	bar1 = readl(addr + PCI_BASE_ADDRESS_1);
+	iounmap(addr);
+
+	/* sanity check */
+	if (bar0 & (PCI_BASE_ADDRESS_MEM_TYPE_1M | PCI_BASE_ADDRESS_SPACE_IO))
+		return CXL_RESOURCE_NONE;
+
+	component_reg_phys = bar0 & PCI_BASE_ADDRESS_MEM_MASK;
+	if (bar0 & PCI_BASE_ADDRESS_MEM_TYPE_64)
+		component_reg_phys |= ((u64)bar1) << 32;
+
+	if (!component_reg_phys)
+		return CXL_RESOURCE_NONE;
+
+	/* MEMBAR is block size (64k) aligned. */
+	if (!IS_ALIGNED(component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE))
+		return CXL_RESOURCE_NONE;
+
+	return component_reg_phys;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_rcrb_to_component, CXL);
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index ac8998b627b5..d6b4fe68a821 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -219,6 +219,14 @@  enum cxl_regloc_type;
 int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
 		      struct cxl_register_map *map);
 
+enum cxl_rcrb {
+	CXL_RCRB_DOWNSTREAM,
+	CXL_RCRB_UPSTREAM,
+};
+resource_size_t cxl_rcrb_to_component(struct device *dev,
+				      resource_size_t rcrb,
+				      enum cxl_rcrb which);
+
 #define CXL_RESOURCE_NONE ((resource_size_t) -1)
 #define CXL_TARGET_STRLEN 20