[v2] crypto: ccp - Allocate TEE ring and cmd buffer using DMA APIs

Message ID 651349f55060767a9a51316c966c1e5daa57a644.1670919979.git.Rijo-john.Thomas@amd.com
State New
Headers
Series [v2] crypto: ccp - Allocate TEE ring and cmd buffer using DMA APIs |

Commit Message

Rijo Thomas Dec. 13, 2022, 11:10 a.m. UTC
  For AMD Secure Processor (ASP) to map and access TEE ring buffer, the
ring buffer address sent by host to ASP must be a real physical
address and the pages must be physically contiguous.

In a virtualized environment though, when the driver is running in a
guest VM, the pages allocated by __get_free_pages() may not be
contiguous in the host (or machine) physical address space. Guests
will see a guest (or pseudo) physical address and not the actual host
(or machine) physical address. The TEE running on ASP cannot decipher
pseudo physical addresses. It needs host or machine physical address.

To resolve this problem, use DMA APIs for allocating buffers that must
be shared with TEE. This will ensure that the pages are contiguous in
host (or machine) address space. If the DMA handle is an IOVA,
translate it into a physical address before sending it to ASP.

This patch also exports two APIs (one for buffer allocation and
another to free the buffer). This API can be used by AMD-TEE driver to
share buffers with TEE.

Fixes: 33960acccfbd ("crypto: ccp - add TEE support for Raven Ridge")
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: stable@vger.kernel.org
Signed-off-by: Rijo Thomas <Rijo-john.Thomas@amd.com>
Co-developed-by: Jeshwanth <JESHWANTHKUMAR.NK@amd.com>
Signed-off-by: Jeshwanth <JESHWANTHKUMAR.NK@amd.com>
Reviewed-by: Devaraj Rangasamy <Devaraj.Rangasamy@amd.com>
---
v2:
 * Removed references to dma_buffer.
 * If psp_init() fails, clear reference to master device.
 * Handle gfp flags within psp_tee_alloc_buffer() instead of passing it as
   a function argument.
 * Added comments within psp_tee_alloc_buffer() to serve as future
   documentation.

 drivers/crypto/ccp/psp-dev.c |  13 ++--
 drivers/crypto/ccp/tee-dev.c | 124 +++++++++++++++++++++++------------
 drivers/crypto/ccp/tee-dev.h |   9 +--
 include/linux/psp-tee.h      |  49 ++++++++++++++
 4 files changed, 142 insertions(+), 53 deletions(-)

--
2.25.1
  

Comments

Jeremi Piotrowski Dec. 15, 2022, 1:29 p.m. UTC | #1
On Tue, Dec 13, 2022 at 04:40:27PM +0530, Rijo Thomas wrote:
> For AMD Secure Processor (ASP) to map and access TEE ring buffer, the
> ring buffer address sent by host to ASP must be a real physical
> address and the pages must be physically contiguous.
> 
> In a virtualized environment though, when the driver is running in a
> guest VM, the pages allocated by __get_free_pages() may not be
> contiguous in the host (or machine) physical address space. Guests
> will see a guest (or pseudo) physical address and not the actual host
> (or machine) physical address. The TEE running on ASP cannot decipher
> pseudo physical addresses. It needs host or machine physical address.
> 
> To resolve this problem, use DMA APIs for allocating buffers that must
> be shared with TEE. This will ensure that the pages are contiguous in
> host (or machine) address space. If the DMA handle is an IOVA,
> translate it into a physical address before sending it to ASP.
> 
> This patch also exports two APIs (one for buffer allocation and
> another to free the buffer). This API can be used by AMD-TEE driver to
> share buffers with TEE.
> 
> Fixes: 33960acccfbd ("crypto: ccp - add TEE support for Raven Ridge")
> Cc: Tom Lendacky <thomas.lendacky@amd.com>
> Cc: stable@vger.kernel.org
> Signed-off-by: Rijo Thomas <Rijo-john.Thomas@amd.com>
> Co-developed-by: Jeshwanth <JESHWANTHKUMAR.NK@amd.com>
> Signed-off-by: Jeshwanth <JESHWANTHKUMAR.NK@amd.com>
> Reviewed-by: Devaraj Rangasamy <Devaraj.Rangasamy@amd.com>
> ---
> v2:
>  * Removed references to dma_buffer.
>  * If psp_init() fails, clear reference to master device.
>  * Handle gfp flags within psp_tee_alloc_buffer() instead of passing it as
>    a function argument.
>  * Added comments within psp_tee_alloc_buffer() to serve as future
>    documentation.
> 
>  drivers/crypto/ccp/psp-dev.c |  13 ++--
>  drivers/crypto/ccp/tee-dev.c | 124 +++++++++++++++++++++++------------
>  drivers/crypto/ccp/tee-dev.h |   9 +--
>  include/linux/psp-tee.h      |  49 ++++++++++++++
>  4 files changed, 142 insertions(+), 53 deletions(-)
> 
> diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
> index c9c741ac8442..380f5caaa550 100644
> --- a/drivers/crypto/ccp/psp-dev.c
> +++ b/drivers/crypto/ccp/psp-dev.c
> @@ -161,13 +161,13 @@ int psp_dev_init(struct sp_device *sp)
>  		goto e_err;
>  	}
> 
> -	ret = psp_init(psp);
> -	if (ret)
> -		goto e_irq;
> -
>  	if (sp->set_psp_master_device)
>  		sp->set_psp_master_device(sp);
> 
> +	ret = psp_init(psp);
> +	if (ret)
> +		goto e_clear;
> +
>  	/* Enable interrupt */
>  	iowrite32(-1, psp->io_regs + psp->vdata->inten_reg);
> 
> @@ -175,7 +175,10 @@ int psp_dev_init(struct sp_device *sp)
> 
>  	return 0;
> 
> -e_irq:
> +e_clear:
> +	if (sp->clear_psp_master_device)
> +		sp->clear_psp_master_device(sp);
> +
>  	sp_free_psp_irq(psp->sp, psp);
>  e_err:
>  	sp->psp_data = NULL;
> diff --git a/drivers/crypto/ccp/tee-dev.c b/drivers/crypto/ccp/tee-dev.c
> index 5c9d47f3be37..5c43e6e166f1 100644
> --- a/drivers/crypto/ccp/tee-dev.c
> +++ b/drivers/crypto/ccp/tee-dev.c
> @@ -12,8 +12,9 @@
>  #include <linux/mutex.h>
>  #include <linux/delay.h>
>  #include <linux/slab.h>
> +#include <linux/dma-direct.h>
> +#include <linux/iommu.h>
>  #include <linux/gfp.h>
> -#include <linux/psp-sev.h>
>  #include <linux/psp-tee.h>
> 
>  #include "psp-dev.h"
> @@ -21,25 +22,73 @@
> 
>  static bool psp_dead;
> 
> +struct psp_tee_buffer *psp_tee_alloc_buffer(unsigned long size)
> +{
> +	struct psp_device *psp = psp_get_master_device();
> +	struct psp_tee_buffer *buf;
> +	struct iommu_domain *dom;
> +
> +	if (!psp || !size)
> +		return NULL;
> +
> +	buf = kzalloc(sizeof(*buf), GFP_KERNEL);
> +	if (!buf)
> +		return NULL;
> +
> +	/* The pages allocated for PSP Trusted OS must be physically
> +	 * contiguous in host (or machine) address space. Therefore,
> +	 * use DMA API to allocate memory.
> +	 */
> +
> +	buf->vaddr = dma_alloc_coherent(psp->dev, size, &buf->dma,
> +					GFP_KERNEL | __GFP_ZERO);

dma_alloc_coherent memory is just as contiguous as __get_free_pages, and
calling dma_alloc_coherent from a guest does not guarantee that the memory is
contiguous in host memory either. The memory would look contiguous from the
device point of view thanks to the IOMMU though (in both cases). So this is not
about being contiguous but other properties that you might rely on (dma mask
most likely, or coherent if you're not running this on x86?).

Can you confirm why this fixes things and update the comment to reflect that.

> +	if (!buf->vaddr || !buf->dma) {
> +		kfree(buf);
> +		return NULL;
> +	}
> +
> +	buf->size = size;
> +
> +	/* Check whether IOMMU is present. If present, convert IOVA to
> +	 * physical address. In the absence of IOMMU, the DMA address
> +	 * is actually the physical address.
> +	 */
> +
> +	dom = iommu_get_domain_for_dev(psp->dev);
> +	if (dom)
> +		buf->paddr = iommu_iova_to_phys(dom, buf->dma);
> +	else
> +		buf->paddr = buf->dma;

This is confusing: you're storing GPA for the guest and HPA in case of the
host, to pass to the device. Let's talk about the host case.

a) the device is behind an IOMMU. The DMA API gives you an IOVA, and the device
should be using the IOVA to access memory (because it's behind an IOMMU).
b) the device is not behind an IOMMU. The DMA API gives you a PA, the device
uses a PA.

But in case a) you're extracting the PA, which means your device can bypass the
IOMMU, in which case the system should not think that it is behind an IOMMU. So
how does this work?

Jeremi

> +
> +	return buf;
> +}
> +EXPORT_SYMBOL(psp_tee_alloc_buffer);
> +
  
Alex Deucher Dec. 16, 2022, 5:48 p.m. UTC | #2
On Fri, Dec 16, 2022 at 3:20 AM Jeremi Piotrowski
<jpiotrowski@linux.microsoft.com> wrote:
>
> On Tue, Dec 13, 2022 at 04:40:27PM +0530, Rijo Thomas wrote:
> > For AMD Secure Processor (ASP) to map and access TEE ring buffer, the
> > ring buffer address sent by host to ASP must be a real physical
> > address and the pages must be physically contiguous.
> >
> > In a virtualized environment though, when the driver is running in a
> > guest VM, the pages allocated by __get_free_pages() may not be
> > contiguous in the host (or machine) physical address space. Guests
> > will see a guest (or pseudo) physical address and not the actual host
> > (or machine) physical address. The TEE running on ASP cannot decipher
> > pseudo physical addresses. It needs host or machine physical address.
> >
> > To resolve this problem, use DMA APIs for allocating buffers that must
> > be shared with TEE. This will ensure that the pages are contiguous in
> > host (or machine) address space. If the DMA handle is an IOVA,
> > translate it into a physical address before sending it to ASP.
> >
> > This patch also exports two APIs (one for buffer allocation and
> > another to free the buffer). This API can be used by AMD-TEE driver to
> > share buffers with TEE.
> >
> > Fixes: 33960acccfbd ("crypto: ccp - add TEE support for Raven Ridge")
> > Cc: Tom Lendacky <thomas.lendacky@amd.com>
> > Cc: stable@vger.kernel.org
> > Signed-off-by: Rijo Thomas <Rijo-john.Thomas@amd.com>
> > Co-developed-by: Jeshwanth <JESHWANTHKUMAR.NK@amd.com>
> > Signed-off-by: Jeshwanth <JESHWANTHKUMAR.NK@amd.com>
> > Reviewed-by: Devaraj Rangasamy <Devaraj.Rangasamy@amd.com>
> > ---
> > v2:
> >  * Removed references to dma_buffer.
> >  * If psp_init() fails, clear reference to master device.
> >  * Handle gfp flags within psp_tee_alloc_buffer() instead of passing it as
> >    a function argument.
> >  * Added comments within psp_tee_alloc_buffer() to serve as future
> >    documentation.
> >
> >  drivers/crypto/ccp/psp-dev.c |  13 ++--
> >  drivers/crypto/ccp/tee-dev.c | 124 +++++++++++++++++++++++------------
> >  drivers/crypto/ccp/tee-dev.h |   9 +--
> >  include/linux/psp-tee.h      |  49 ++++++++++++++
> >  4 files changed, 142 insertions(+), 53 deletions(-)
> >
> > diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
> > index c9c741ac8442..380f5caaa550 100644
> > --- a/drivers/crypto/ccp/psp-dev.c
> > +++ b/drivers/crypto/ccp/psp-dev.c
> > @@ -161,13 +161,13 @@ int psp_dev_init(struct sp_device *sp)
> >               goto e_err;
> >       }
> >
> > -     ret = psp_init(psp);
> > -     if (ret)
> > -             goto e_irq;
> > -
> >       if (sp->set_psp_master_device)
> >               sp->set_psp_master_device(sp);
> >
> > +     ret = psp_init(psp);
> > +     if (ret)
> > +             goto e_clear;
> > +
> >       /* Enable interrupt */
> >       iowrite32(-1, psp->io_regs + psp->vdata->inten_reg);
> >
> > @@ -175,7 +175,10 @@ int psp_dev_init(struct sp_device *sp)
> >
> >       return 0;
> >
> > -e_irq:
> > +e_clear:
> > +     if (sp->clear_psp_master_device)
> > +             sp->clear_psp_master_device(sp);
> > +
> >       sp_free_psp_irq(psp->sp, psp);
> >  e_err:
> >       sp->psp_data = NULL;
> > diff --git a/drivers/crypto/ccp/tee-dev.c b/drivers/crypto/ccp/tee-dev.c
> > index 5c9d47f3be37..5c43e6e166f1 100644
> > --- a/drivers/crypto/ccp/tee-dev.c
> > +++ b/drivers/crypto/ccp/tee-dev.c
> > @@ -12,8 +12,9 @@
> >  #include <linux/mutex.h>
> >  #include <linux/delay.h>
> >  #include <linux/slab.h>
> > +#include <linux/dma-direct.h>
> > +#include <linux/iommu.h>
> >  #include <linux/gfp.h>
> > -#include <linux/psp-sev.h>
> >  #include <linux/psp-tee.h>
> >
> >  #include "psp-dev.h"
> > @@ -21,25 +22,73 @@
> >
> >  static bool psp_dead;
> >
> > +struct psp_tee_buffer *psp_tee_alloc_buffer(unsigned long size)
> > +{
> > +     struct psp_device *psp = psp_get_master_device();
> > +     struct psp_tee_buffer *buf;
> > +     struct iommu_domain *dom;
> > +
> > +     if (!psp || !size)
> > +             return NULL;
> > +
> > +     buf = kzalloc(sizeof(*buf), GFP_KERNEL);
> > +     if (!buf)
> > +             return NULL;
> > +
> > +     /* The pages allocated for PSP Trusted OS must be physically
> > +      * contiguous in host (or machine) address space. Therefore,
> > +      * use DMA API to allocate memory.
> > +      */
> > +
> > +     buf->vaddr = dma_alloc_coherent(psp->dev, size, &buf->dma,
> > +                                     GFP_KERNEL | __GFP_ZERO);
>
> dma_alloc_coherent memory is just as contiguous as __get_free_pages, and
> calling dma_alloc_coherent from a guest does not guarantee that the memory is
> contiguous in host memory either. The memory would look contiguous from the
> device point of view thanks to the IOMMU though (in both cases). So this is not
> about being contiguous but other properties that you might rely on (dma mask
> most likely, or coherent if you're not running this on x86?).
>
> Can you confirm why this fixes things and update the comment to reflect that.
>
> > +     if (!buf->vaddr || !buf->dma) {
> > +             kfree(buf);
> > +             return NULL;
> > +     }
> > +
> > +     buf->size = size;
> > +
> > +     /* Check whether IOMMU is present. If present, convert IOVA to
> > +      * physical address. In the absence of IOMMU, the DMA address
> > +      * is actually the physical address.
> > +      */
> > +
> > +     dom = iommu_get_domain_for_dev(psp->dev);
> > +     if (dom)
> > +             buf->paddr = iommu_iova_to_phys(dom, buf->dma);
> > +     else
> > +             buf->paddr = buf->dma;
>
> This is confusing: you're storing GPA for the guest and HPA in case of the
> host, to pass to the device. Let's talk about the host case.
>
> a) the device is behind an IOMMU. The DMA API gives you an IOVA, and the device
> should be using the IOVA to access memory (because it's behind an IOMMU).
> b) the device is not behind an IOMMU. The DMA API gives you a PA, the device
> uses a PA.
>
> But in case a) you're extracting the PA, which means your device can bypass the
> IOMMU, in which case the system should not think that it is behind an IOMMU. So
> how does this work?

IIRC, as per the AMD IOMMU spec[1], there is an ACPI IVRS table which
details the devices on the platform and whether or not they
participate with IOMMU.  This should carry through to the DMA API.  If
the ACPI tables do not reflect this correctly we should add a quirk to
the AMD IOMMU to properly handle the device.

Alex

[1] - https://www.amd.com/en/support/tech-docs/amd-io-virtualization-technology-iommu-specification

Alex

>
> Jeremi
>
> > +
> > +     return buf;
> > +}
> > +EXPORT_SYMBOL(psp_tee_alloc_buffer);
> > +
>
  
Rijo Thomas Dec. 23, 2022, 12:15 p.m. UTC | #3
On 12/15/2022 6:59 PM, Jeremi Piotrowski wrote:
> On Tue, Dec 13, 2022 at 04:40:27PM +0530, Rijo Thomas wrote:
>> For AMD Secure Processor (ASP) to map and access TEE ring buffer, the
>> ring buffer address sent by host to ASP must be a real physical
>> address and the pages must be physically contiguous.
>>
>> In a virtualized environment though, when the driver is running in a
>> guest VM, the pages allocated by __get_free_pages() may not be
>> contiguous in the host (or machine) physical address space. Guests
>> will see a guest (or pseudo) physical address and not the actual host
>> (or machine) physical address. The TEE running on ASP cannot decipher
>> pseudo physical addresses. It needs host or machine physical address.
>>
>> To resolve this problem, use DMA APIs for allocating buffers that must
>> be shared with TEE. This will ensure that the pages are contiguous in
>> host (or machine) address space. If the DMA handle is an IOVA,
>> translate it into a physical address before sending it to ASP.
>>
>> This patch also exports two APIs (one for buffer allocation and
>> another to free the buffer). This API can be used by AMD-TEE driver to
>> share buffers with TEE.
>>
>> Fixes: 33960acccfbd ("crypto: ccp - add TEE support for Raven Ridge")
>> Cc: Tom Lendacky <thomas.lendacky@amd.com>
>> Cc: stable@vger.kernel.org
>> Signed-off-by: Rijo Thomas <Rijo-john.Thomas@amd.com>
>> Co-developed-by: Jeshwanth <JESHWANTHKUMAR.NK@amd.com>
>> Signed-off-by: Jeshwanth <JESHWANTHKUMAR.NK@amd.com>
>> Reviewed-by: Devaraj Rangasamy <Devaraj.Rangasamy@amd.com>
>> ---
>> v2:
>>  * Removed references to dma_buffer.
>>  * If psp_init() fails, clear reference to master device.
>>  * Handle gfp flags within psp_tee_alloc_buffer() instead of passing it as
>>    a function argument.
>>  * Added comments within psp_tee_alloc_buffer() to serve as future
>>    documentation.
>>
>>  drivers/crypto/ccp/psp-dev.c |  13 ++--
>>  drivers/crypto/ccp/tee-dev.c | 124 +++++++++++++++++++++++------------
>>  drivers/crypto/ccp/tee-dev.h |   9 +--
>>  include/linux/psp-tee.h      |  49 ++++++++++++++
>>  4 files changed, 142 insertions(+), 53 deletions(-)
>>
>> diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
>> index c9c741ac8442..380f5caaa550 100644
>> --- a/drivers/crypto/ccp/psp-dev.c
>> +++ b/drivers/crypto/ccp/psp-dev.c
>> @@ -161,13 +161,13 @@ int psp_dev_init(struct sp_device *sp)
>>  		goto e_err;
>>  	}
>>
>> -	ret = psp_init(psp);
>> -	if (ret)
>> -		goto e_irq;
>> -
>>  	if (sp->set_psp_master_device)
>>  		sp->set_psp_master_device(sp);
>>
>> +	ret = psp_init(psp);
>> +	if (ret)
>> +		goto e_clear;
>> +
>>  	/* Enable interrupt */
>>  	iowrite32(-1, psp->io_regs + psp->vdata->inten_reg);
>>
>> @@ -175,7 +175,10 @@ int psp_dev_init(struct sp_device *sp)
>>
>>  	return 0;
>>
>> -e_irq:
>> +e_clear:
>> +	if (sp->clear_psp_master_device)
>> +		sp->clear_psp_master_device(sp);
>> +
>>  	sp_free_psp_irq(psp->sp, psp);
>>  e_err:
>>  	sp->psp_data = NULL;
>> diff --git a/drivers/crypto/ccp/tee-dev.c b/drivers/crypto/ccp/tee-dev.c
>> index 5c9d47f3be37..5c43e6e166f1 100644
>> --- a/drivers/crypto/ccp/tee-dev.c
>> +++ b/drivers/crypto/ccp/tee-dev.c
>> @@ -12,8 +12,9 @@
>>  #include <linux/mutex.h>
>>  #include <linux/delay.h>
>>  #include <linux/slab.h>
>> +#include <linux/dma-direct.h>
>> +#include <linux/iommu.h>
>>  #include <linux/gfp.h>
>> -#include <linux/psp-sev.h>
>>  #include <linux/psp-tee.h>
>>
>>  #include "psp-dev.h"
>> @@ -21,25 +22,73 @@
>>
>>  static bool psp_dead;
>>
>> +struct psp_tee_buffer *psp_tee_alloc_buffer(unsigned long size)
>> +{
>> +	struct psp_device *psp = psp_get_master_device();
>> +	struct psp_tee_buffer *buf;
>> +	struct iommu_domain *dom;
>> +
>> +	if (!psp || !size)
>> +		return NULL;
>> +
>> +	buf = kzalloc(sizeof(*buf), GFP_KERNEL);
>> +	if (!buf)
>> +		return NULL;
>> +
>> +	/* The pages allocated for PSP Trusted OS must be physically
>> +	 * contiguous in host (or machine) address space. Therefore,
>> +	 * use DMA API to allocate memory.
>> +	 */
>> +
>> +	buf->vaddr = dma_alloc_coherent(psp->dev, size, &buf->dma,
>> +					GFP_KERNEL | __GFP_ZERO);
> 
> dma_alloc_coherent memory is just as contiguous as __get_free_pages, and
> calling dma_alloc_coherent from a guest does not guarantee that the memory is
> contiguous in host memory either. The memory would look contiguous from the
> device point of view thanks to the IOMMU though (in both cases). So this is not
> about being contiguous but other properties that you might rely on (dma mask
> most likely, or coherent if you're not running this on x86?).
> 
> Can you confirm why this fixes things and update the comment to reflect that.
> 

I see what you are saying.

We verified this in Xen Dom0 PV guest, where dma_alloc_coherent() returned a memory
that is contiguous in machine address space, and the machine address was returned
in the dma handle (buf->dma).

>> +	if (!buf->vaddr || !buf->dma) {
>> +		kfree(buf);
>> +		return NULL;
>> +	}
>> +
>> +	buf->size = size;
>> +
>> +	/* Check whether IOMMU is present. If present, convert IOVA to
>> +	 * physical address. In the absence of IOMMU, the DMA address
>> +	 * is actually the physical address.
>> +	 */
>> +
>> +	dom = iommu_get_domain_for_dev(psp->dev);
>> +	if (dom)
>> +		buf->paddr = iommu_iova_to_phys(dom, buf->dma);
>> +	else
>> +		buf->paddr = buf->dma;
> 
> This is confusing: you're storing GPA for the guest and HPA in case of the
> host, to pass to the device. Let's talk about the host case.
> 
> a) the device is behind an IOMMU. The DMA API gives you an IOVA, and the device
> should be using the IOVA to access memory (because it's behind an IOMMU).
> b) the device is not behind an IOMMU. The DMA API gives you a PA, the device
> uses a PA.
> 
> But in case a) you're extracting the PA, which means your device can bypass the
> IOMMU, in which case the system should not think that it is behind an IOMMU. So
> how does this work?
> 

PSP Trusted OS maps memory without using IOMMU (it bypasses IOMMU). Hence, we need to pass system
physical address to PSP. That's why we cannot pass IOVA or GPA to PSP.

We are re-evaluating our solution to handle other scenarios as well (not just Xen Dom0 PV).

Thanks,
Rijo

> Jeremi
> 
>> +
>> +	return buf;
>> +}
>> +EXPORT_SYMBOL(psp_tee_alloc_buffer);
>> +
>
  
Herbert Xu Dec. 30, 2022, 8:24 a.m. UTC | #4
On Fri, Dec 23, 2022 at 05:45:24PM +0530, Rijo Thomas wrote:
>
> > dma_alloc_coherent memory is just as contiguous as __get_free_pages, and
> > calling dma_alloc_coherent from a guest does not guarantee that the memory is
> > contiguous in host memory either. The memory would look contiguous from the
> > device point of view thanks to the IOMMU though (in both cases). So this is not
> > about being contiguous but other properties that you might rely on (dma mask
> > most likely, or coherent if you're not running this on x86?).
> > 
> > Can you confirm why this fixes things and update the comment to reflect that.
> 
> I see what you are saying.
> 
> We verified this in Xen Dom0 PV guest, where dma_alloc_coherent() returned a memory
> that is contiguous in machine address space, and the machine address was returned
> in the dma handle (buf->dma).

So is this even relevant to the mainstream kernel or is this patch
only needed for Xen Dom0?

Thanks,
  
Rijo Thomas Jan. 4, 2023, 6 a.m. UTC | #5
On 12/30/2022 1:54 PM, Herbert Xu wrote:
> On Fri, Dec 23, 2022 at 05:45:24PM +0530, Rijo Thomas wrote:
>>
>>> dma_alloc_coherent memory is just as contiguous as __get_free_pages, and
>>> calling dma_alloc_coherent from a guest does not guarantee that the memory is
>>> contiguous in host memory either. The memory would look contiguous from the
>>> device point of view thanks to the IOMMU though (in both cases). So this is not
>>> about being contiguous but other properties that you might rely on (dma mask
>>> most likely, or coherent if you're not running this on x86?).
>>>
>>> Can you confirm why this fixes things and update the comment to reflect that.
>>
>> I see what you are saying.
>>
>> We verified this in Xen Dom0 PV guest, where dma_alloc_coherent() returned a memory
>> that is contiguous in machine address space, and the machine address was returned
>> in the dma handle (buf->dma).
> 
> So is this even relevant to the mainstream kernel or is this patch
> only needed for Xen Dom0?
> 
> Thanks,

Herbert,

This patch is no longer relevant to the mainstream kernel.

Thanks,
Rijo
  

Patch

diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
index c9c741ac8442..380f5caaa550 100644
--- a/drivers/crypto/ccp/psp-dev.c
+++ b/drivers/crypto/ccp/psp-dev.c
@@ -161,13 +161,13 @@  int psp_dev_init(struct sp_device *sp)
 		goto e_err;
 	}

-	ret = psp_init(psp);
-	if (ret)
-		goto e_irq;
-
 	if (sp->set_psp_master_device)
 		sp->set_psp_master_device(sp);

+	ret = psp_init(psp);
+	if (ret)
+		goto e_clear;
+
 	/* Enable interrupt */
 	iowrite32(-1, psp->io_regs + psp->vdata->inten_reg);

@@ -175,7 +175,10 @@  int psp_dev_init(struct sp_device *sp)

 	return 0;

-e_irq:
+e_clear:
+	if (sp->clear_psp_master_device)
+		sp->clear_psp_master_device(sp);
+
 	sp_free_psp_irq(psp->sp, psp);
 e_err:
 	sp->psp_data = NULL;
diff --git a/drivers/crypto/ccp/tee-dev.c b/drivers/crypto/ccp/tee-dev.c
index 5c9d47f3be37..5c43e6e166f1 100644
--- a/drivers/crypto/ccp/tee-dev.c
+++ b/drivers/crypto/ccp/tee-dev.c
@@ -12,8 +12,9 @@ 
 #include <linux/mutex.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
+#include <linux/dma-direct.h>
+#include <linux/iommu.h>
 #include <linux/gfp.h>
-#include <linux/psp-sev.h>
 #include <linux/psp-tee.h>

 #include "psp-dev.h"
@@ -21,25 +22,73 @@ 

 static bool psp_dead;

+struct psp_tee_buffer *psp_tee_alloc_buffer(unsigned long size)
+{
+	struct psp_device *psp = psp_get_master_device();
+	struct psp_tee_buffer *buf;
+	struct iommu_domain *dom;
+
+	if (!psp || !size)
+		return NULL;
+
+	buf = kzalloc(sizeof(*buf), GFP_KERNEL);
+	if (!buf)
+		return NULL;
+
+	/* The pages allocated for PSP Trusted OS must be physically
+	 * contiguous in host (or machine) address space. Therefore,
+	 * use DMA API to allocate memory.
+	 */
+
+	buf->vaddr = dma_alloc_coherent(psp->dev, size, &buf->dma,
+					GFP_KERNEL | __GFP_ZERO);
+	if (!buf->vaddr || !buf->dma) {
+		kfree(buf);
+		return NULL;
+	}
+
+	buf->size = size;
+
+	/* Check whether IOMMU is present. If present, convert IOVA to
+	 * physical address. In the absence of IOMMU, the DMA address
+	 * is actually the physical address.
+	 */
+
+	dom = iommu_get_domain_for_dev(psp->dev);
+	if (dom)
+		buf->paddr = iommu_iova_to_phys(dom, buf->dma);
+	else
+		buf->paddr = buf->dma;
+
+	return buf;
+}
+EXPORT_SYMBOL(psp_tee_alloc_buffer);
+
+void psp_tee_free_buffer(struct psp_tee_buffer *buf)
+{
+	struct psp_device *psp = psp_get_master_device();
+
+	if (!psp || !buf)
+		return;
+
+	dma_free_coherent(psp->dev, buf->size, buf->vaddr, buf->dma);
+
+	kfree(buf);
+}
+EXPORT_SYMBOL(psp_tee_free_buffer);
+
 static int tee_alloc_ring(struct psp_tee_device *tee, int ring_size)
 {
 	struct ring_buf_manager *rb_mgr = &tee->rb_mgr;
-	void *start_addr;

 	if (!ring_size)
 		return -EINVAL;

-	/* We need actual physical address instead of DMA address, since
-	 * Trusted OS running on AMD Secure Processor will map this region
-	 */
-	start_addr = (void *)__get_free_pages(GFP_KERNEL, get_order(ring_size));
-	if (!start_addr)
+	rb_mgr->ring_buf = psp_tee_alloc_buffer(ring_size);
+	if (!rb_mgr->ring_buf) {
+		dev_err(tee->dev, "ring allocation failed\n");
 		return -ENOMEM;
-
-	memset(start_addr, 0x0, ring_size);
-	rb_mgr->ring_start = start_addr;
-	rb_mgr->ring_size = ring_size;
-	rb_mgr->ring_pa = __psp_pa(start_addr);
+	}
 	mutex_init(&rb_mgr->mutex);

 	return 0;
@@ -49,15 +98,8 @@  static void tee_free_ring(struct psp_tee_device *tee)
 {
 	struct ring_buf_manager *rb_mgr = &tee->rb_mgr;

-	if (!rb_mgr->ring_start)
-		return;
-
-	free_pages((unsigned long)rb_mgr->ring_start,
-		   get_order(rb_mgr->ring_size));
+	psp_tee_free_buffer(rb_mgr->ring_buf);

-	rb_mgr->ring_start = NULL;
-	rb_mgr->ring_size = 0;
-	rb_mgr->ring_pa = 0;
 	mutex_destroy(&rb_mgr->mutex);
 }

@@ -81,35 +123,35 @@  static int tee_wait_cmd_poll(struct psp_tee_device *tee, unsigned int timeout,
 	return -ETIMEDOUT;
 }

-static
-struct tee_init_ring_cmd *tee_alloc_cmd_buffer(struct psp_tee_device *tee)
+static struct psp_tee_buffer *tee_alloc_cmd_buffer(struct psp_tee_device *tee)
 {
+	struct psp_tee_buffer *cmd_buffer;
 	struct tee_init_ring_cmd *cmd;

-	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
-	if (!cmd)
+	cmd_buffer = psp_tee_alloc_buffer(sizeof(*cmd));
+	if (!cmd_buffer)
 		return NULL;

-	cmd->hi_addr = upper_32_bits(tee->rb_mgr.ring_pa);
-	cmd->low_addr = lower_32_bits(tee->rb_mgr.ring_pa);
-	cmd->size = tee->rb_mgr.ring_size;
+	cmd = (struct tee_init_ring_cmd *)cmd_buffer->vaddr;
+	cmd->hi_addr = upper_32_bits(tee->rb_mgr.ring_buf->paddr);
+	cmd->low_addr = lower_32_bits(tee->rb_mgr.ring_buf->paddr);
+	cmd->size = tee->rb_mgr.ring_buf->size;

 	dev_dbg(tee->dev, "tee: ring address: high = 0x%x low = 0x%x size = %u\n",
 		cmd->hi_addr, cmd->low_addr, cmd->size);

-	return cmd;
+	return cmd_buffer;
 }

-static inline void tee_free_cmd_buffer(struct tee_init_ring_cmd *cmd)
+static inline void tee_free_cmd_buffer(struct psp_tee_buffer *cmd_buffer)
 {
-	kfree(cmd);
+	psp_tee_free_buffer(cmd_buffer);
 }

 static int tee_init_ring(struct psp_tee_device *tee)
 {
 	int ring_size = MAX_RING_BUFFER_ENTRIES * sizeof(struct tee_ring_cmd);
-	struct tee_init_ring_cmd *cmd;
-	phys_addr_t cmd_buffer;
+	struct psp_tee_buffer *cmd_buffer;
 	unsigned int reg;
 	int ret;

@@ -123,21 +165,19 @@  static int tee_init_ring(struct psp_tee_device *tee)

 	tee->rb_mgr.wptr = 0;

-	cmd = tee_alloc_cmd_buffer(tee);
-	if (!cmd) {
+	cmd_buffer = tee_alloc_cmd_buffer(tee);
+	if (!cmd_buffer) {
 		tee_free_ring(tee);
 		return -ENOMEM;
 	}

-	cmd_buffer = __psp_pa((void *)cmd);
-
 	/* Send command buffer details to Trusted OS by writing to
 	 * CPU-PSP message registers
 	 */

-	iowrite32(lower_32_bits(cmd_buffer),
+	iowrite32(lower_32_bits(cmd_buffer->paddr),
 		  tee->io_regs + tee->vdata->cmdbuff_addr_lo_reg);
-	iowrite32(upper_32_bits(cmd_buffer),
+	iowrite32(upper_32_bits(cmd_buffer->paddr),
 		  tee->io_regs + tee->vdata->cmdbuff_addr_hi_reg);
 	iowrite32(TEE_RING_INIT_CMD,
 		  tee->io_regs + tee->vdata->cmdresp_reg);
@@ -157,7 +197,7 @@  static int tee_init_ring(struct psp_tee_device *tee)
 	}

 free_buf:
-	tee_free_cmd_buffer(cmd);
+	tee_free_cmd_buffer(cmd_buffer);

 	return ret;
 }
@@ -167,7 +207,7 @@  static void tee_destroy_ring(struct psp_tee_device *tee)
 	unsigned int reg;
 	int ret;

-	if (!tee->rb_mgr.ring_start)
+	if (!tee->rb_mgr.ring_buf->vaddr)
 		return;

 	if (psp_dead)
@@ -256,7 +296,7 @@  static int tee_submit_cmd(struct psp_tee_device *tee, enum tee_cmd_id cmd_id,
 	do {
 		/* Get pointer to ring buffer command entry */
 		cmd = (struct tee_ring_cmd *)
-			(tee->rb_mgr.ring_start + tee->rb_mgr.wptr);
+			(tee->rb_mgr.ring_buf->vaddr + tee->rb_mgr.wptr);

 		rptr = ioread32(tee->io_regs + tee->vdata->ring_rptr_reg);

@@ -305,7 +345,7 @@  static int tee_submit_cmd(struct psp_tee_device *tee, enum tee_cmd_id cmd_id,

 	/* Update local copy of write pointer */
 	tee->rb_mgr.wptr += sizeof(struct tee_ring_cmd);
-	if (tee->rb_mgr.wptr >= tee->rb_mgr.ring_size)
+	if (tee->rb_mgr.wptr >= tee->rb_mgr.ring_buf->size)
 		tee->rb_mgr.wptr = 0;

 	/* Trigger interrupt to Trusted OS */
diff --git a/drivers/crypto/ccp/tee-dev.h b/drivers/crypto/ccp/tee-dev.h
index 49d26158b71e..ee22d60177a2 100644
--- a/drivers/crypto/ccp/tee-dev.h
+++ b/drivers/crypto/ccp/tee-dev.h
@@ -16,6 +16,7 @@ 

 #include <linux/device.h>
 #include <linux/mutex.h>
+#include <linux/psp-tee.h>

 #define TEE_DEFAULT_TIMEOUT		10
 #define MAX_BUFFER_SIZE			988
@@ -48,16 +49,12 @@  struct tee_init_ring_cmd {

 /**
  * struct ring_buf_manager - Helper structure to manage ring buffer.
- * @ring_start:  starting address of ring buffer
- * @ring_size:   size of ring buffer in bytes
- * @ring_pa:     physical address of ring buffer
+ * @ring_buf:    ring buffer allocated for sharing with TEE
  * @wptr:        index to the last written entry in ring buffer
  */
 struct ring_buf_manager {
 	struct mutex mutex;	/* synchronizes access to ring buffer */
-	void *ring_start;
-	u32 ring_size;
-	phys_addr_t ring_pa;
+	struct psp_tee_buffer *ring_buf;
 	u32 wptr;
 };

diff --git a/include/linux/psp-tee.h b/include/linux/psp-tee.h
index cb0c95d6d76b..670bb7877456 100644
--- a/include/linux/psp-tee.h
+++ b/include/linux/psp-tee.h
@@ -13,6 +13,7 @@ 

 #include <linux/types.h>
 #include <linux/errno.h>
+#include <linux/dma-mapping.h>

 /* This file defines the Trusted Execution Environment (TEE) interface commands
  * and the API exported by AMD Secure Processor driver to communicate with
@@ -40,6 +41,22 @@  enum tee_cmd_id {
 	TEE_CMD_ID_UNMAP_SHARED_MEM,
 };

+/**
+ * struct psp_tee_buffer - Structure that has buffer details of memory that can
+ * be shared with PSP TEE.
+ *
+ * @dma:    DMA address of memory
+ * @paddr:  Physical address of memory
+ * @vaddr:  CPU virtual address of memory
+ * @size:   Size of memory in bytes
+ */
+struct psp_tee_buffer {
+	dma_addr_t dma;
+	phys_addr_t paddr;
+	void *vaddr;
+	unsigned long size;
+};
+
 #ifdef CONFIG_CRYPTO_DEV_SP_PSP
 /**
  * psp_tee_process_cmd() - Process command in Trusted Execution Environment
@@ -75,6 +92,28 @@  int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf, size_t len,
  */
 int psp_check_tee_status(void);

+/**
+ * psp_tee_alloc_buffer() - Allocates memory of requested size using
+ * dma_alloc_coherent() API.
+ *
+ * This function can be used to allocate a shared memory region between the
+ * host and PSP TEE.
+ *
+ * Returns:
+ * non-NULL   a valid pointer to struct psp_tee_buffer
+ * NULL       on failure
+ */
+struct psp_tee_buffer *psp_tee_alloc_buffer(unsigned long size);
+
+/**
+ * psp_tee_free_buffer() - Deallocates memory using dma_free_coherent() API.
+ *
+ * This function can be used to release shared memory region between host
+ * and PSP TEE.
+ *
+ */
+void psp_tee_free_buffer(struct psp_tee_buffer *buffer);
+
 #else /* !CONFIG_CRYPTO_DEV_SP_PSP */

 static inline int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf,
@@ -87,5 +126,15 @@  static inline int psp_check_tee_status(void)
 {
 	return -ENODEV;
 }
+
+static inline
+struct psp_tee_buffer *psp_tee_alloc_buffer(unsigned long size)
+{
+	return NULL;
+}
+
+static inline void psp_tee_free_buffer(struct psp_tee_buffer *buffer)
+{
+}
 #endif /* CONFIG_CRYPTO_DEV_SP_PSP */
 #endif /* __PSP_TEE_H_ */