diff mbox series

[V2] virt: Prevent IV reuse in SNP guest driver

Message ID	20221021173328.2489411-1-pgonda@google.com
State	New
Headers	Received-SPF: pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 2620:137:e000::1:20 as permitted sender) client-ip=2620:137:e000::1:20; Date: Fri, 21 Oct 2022 10:33:28 -0700 Message-Id: <20221021173328.2489411-1-pgonda@google.com> Mime-Version: 1.0 Subject: [PATCH V2] virt: Prevent IV reuse in SNP guest driver From: Peter Gonda <pgonda@google.com> To: thomas.lendacky@amd.com Cc: Peter Gonda <pgonda@google.com>, Dionna Glaze <dionnaglaze@google.com>, Borislav Petkov <bp@suse.de>, Michael Roth <michael.roth@amd.com>, Haowen Bai <baihaowen@meizu.com>, Yang Yingliang <yangyingliang@huawei.com>, Marc Orr <marcorr@google.com>, David Rientjes <rientjes@google.com>, Ashish Kalra <Ashish.Kalra@amd.com>, linux-kernel@vger.kernel.org, kvm@vger.kernel.org Content-Type: text/plain; charset="UTF-8" Precedence: bulk
Series	[V2] virt: Prevent IV reuse in SNP guest driver \| [V2] virt: Prevent IV reuse in SNP guest driver

Commit Message

Peter Gonda Oct. 21, 2022, 5:33 p.m. UTC

  The ASP and an SNP guest use a series of AES-GCM keys called VMPCKs to
communicate securely with each other. The IV to this scheme is a
sequence number that both the ASP and the guest track. Currently this
sequence number in a guest request must exactly match the sequence
number tracked by the ASP. This means that if the guest sees an error
from the host during a request it can only retry that exact request or
disable the VMPCK to prevent an IV reuse. AES-GCM cannot tolerate IV
reuse see:
https://csrc.nist.gov/csrc/media/projects/block-cipher-techniques/documents/bcm/comments/800-38-series-drafts/gcm/joux_comments.pdf

To handle userspace querying the cert_data length. Instead of requesting
the cert length from userspace use the size of the drivers allocated
shared buffer. Then copy that buffer to userspace, or give userspace an
error depending on the size of the buffer given by userspace.

Fixes: fce96cf044308 ("virt: Add SEV-SNP guest driver")
Signed-off-by: Peter Gonda <pgonda@google.com>
Reported-by: Peter Gonda <pgonda@google.com>
Reviewed-by: Dionna Glaze <dionnaglaze@google.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Michael Roth <michael.roth@amd.com>
Cc: Haowen Bai <baihaowen@meizu.com>
Cc: Yang Yingliang <yangyingliang@huawei.com>
Cc: Marc Orr <marcorr@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Ashish Kalra <Ashish.Kalra@amd.com>
Cc: linux-kernel@vger.kernel.org
Cc: kvm@vger.kernel.org
---
 drivers/virt/coco/sev-guest/sev-guest.c | 93 ++++++++++++++++---------
 1 file changed, 62 insertions(+), 31 deletions(-)

Comments

Tom Lendacky Oct. 21, 2022, 7:01 p.m. UTC | #1

On 10/21/22 12:33, Peter Gonda wrote:
> The ASP and an SNP guest use a series of AES-GCM keys called VMPCKs to
> communicate securely with each other. The IV to this scheme is a
> sequence number that both the ASP and the guest track. Currently this
> sequence number in a guest request must exactly match the sequence
> number tracked by the ASP. This means that if the guest sees an error
> from the host during a request it can only retry that exact request or
> disable the VMPCK to prevent an IV reuse. AES-GCM cannot tolerate IV
> reuse see:
> https://csrc.nist.gov/csrc/media/projects/block-cipher-techniques/documents/bcm/comments/800-38-series-drafts/gcm/joux_comments.pdf
> 
> To handle userspace querying the cert_data length. Instead of requesting
> the cert length from userspace use the size of the drivers allocated
> shared buffer. Then copy that buffer to userspace, or give userspace an
> error depending on the size of the buffer given by userspace.
> 
> Fixes: fce96cf044308 ("virt: Add SEV-SNP guest driver")
> Signed-off-by: Peter Gonda <pgonda@google.com>
> Reported-by: Peter Gonda <pgonda@google.com>
> Reviewed-by: Dionna Glaze <dionnaglaze@google.com>
> Cc: Borislav Petkov <bp@suse.de>
> Cc: Tom Lendacky <thomas.lendacky@amd.com>
> Cc: Michael Roth <michael.roth@amd.com>
> Cc: Haowen Bai <baihaowen@meizu.com>
> Cc: Yang Yingliang <yangyingliang@huawei.com>
> Cc: Marc Orr <marcorr@google.com>
> Cc: David Rientjes <rientjes@google.com>
> Cc: Ashish Kalra <Ashish.Kalra@amd.com>
> Cc: linux-kernel@vger.kernel.org
> Cc: kvm@vger.kernel.org
> ---
>   drivers/virt/coco/sev-guest/sev-guest.c | 93 ++++++++++++++++---------
>   1 file changed, 62 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c
> index f422f9c58ba7..8c54ea84bc57 100644
> --- a/drivers/virt/coco/sev-guest/sev-guest.c
> +++ b/drivers/virt/coco/sev-guest/sev-guest.c
> @@ -41,7 +41,7 @@ struct snp_guest_dev {
>   	struct device *dev;
>   	struct miscdevice misc;
>   
> -	void *certs_data;
> +	u8 (*certs_data)[SEV_FW_BLOB_MAX_SIZE];
>   	struct snp_guest_crypto *crypto;
>   	struct snp_guest_msg *request, *response;
>   	struct snp_secrets_page_layout *layout;
> @@ -67,8 +67,27 @@ static bool is_vmpck_empty(struct snp_guest_dev *snp_dev)
>   	return true;
>   }
>   
> +/*
> + * If we receive an error from the host or ASP we have two options. We can
> + * either retry the exact same encrypted request or we can discontinue using the
> + * VMPCK.
> + *
> + * This is because in the current encryption scheme GHCB v2 uses AES-GCM to
> + * encrypt the requests. The IV for this scheme is the sequence number. GCM
> + * cannot tolerate IV reuse.
> + *
> + * The ASP FW v1.51 only increments the sequence numbers on a successful
> + * guest<->ASP back and forth and only accepts messages at its exact sequence
> + * number.
> + *
> + * So if we were to reuse the sequence number the encryption scheme is
> + * vulnerable. If we encrypt the sequence number for a fresh IV the ASP will
> + * reject our request.
> + */
>   static void snp_disable_vmpck(struct snp_guest_dev *snp_dev)
>   {
> +	dev_alert(snp_dev->dev, "Disabling vmpck_id: %d to prevent IV reuse.\n",
> +		  vmpck_id);
>   	memzero_explicit(snp_dev->vmpck, VMPCK_KEY_LEN);
>   	snp_dev->vmpck = NULL;
>   }
> @@ -326,29 +345,29 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in
>   	if (fw_err)
>   		*fw_err = err;
>   
> -	if (rc)
> -		return rc;
> +	if (rc) {
> +		dev_alert(snp_dev->dev,
> +			  "Detected error from ASP request. rc: %d, fw_err: %llu\n",
> +			  rc, *fw_err);
> +		goto disable_vmpck;
> +	}
>   
> -	/*
> -	 * The verify_and_dec_payload() will fail only if the hypervisor is
> -	 * actively modifying the message header or corrupting the encrypted payload.
> -	 * This hints that hypervisor is acting in a bad faith. Disable the VMPCK so that
> -	 * the key cannot be used for any communication. The key is disabled to ensure
> -	 * that AES-GCM does not use the same IV while encrypting the request payload.
> -	 */
>   	rc = verify_and_dec_payload(snp_dev, resp_buf, resp_sz);
>   	if (rc) {
>   		dev_alert(snp_dev->dev,
> -			  "Detected unexpected decode failure, disabling the vmpck_id %d\n",
> -			  vmpck_id);
> -		snp_disable_vmpck(snp_dev);
> -		return rc;
> +			  "Detected unexpected decode failure from ASP. rc: %d\n",
> +			  rc);
> +		goto disable_vmpck;
>   	}
>   
>   	/* Increment to new message sequence after payload decryption was successful. */
>   	snp_inc_msg_seqno(snp_dev);
>   
>   	return 0;
> +
> +disable_vmpck:
> +	snp_disable_vmpck(snp_dev);
> +	return rc;
>   }
>   
>   static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg)
> @@ -437,7 +456,7 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques
>   	struct snp_guest_crypto *crypto = snp_dev->crypto;
>   	struct snp_ext_report_req req;
>   	struct snp_report_resp *resp;
> -	int ret, npages = 0, resp_len;
> +	int ret, resp_len, req_cert_len, resp_cert_len;
>   
>   	lockdep_assert_held(&snp_cmd_mutex);
>   
> @@ -448,14 +467,15 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques
>   		return -EFAULT;
>   
>   	/* userspace does not want certificate data */
> -	if (!req.certs_len || !req.certs_address)
> +	req_cert_len = req.certs_len;
> +	if (!req_cert_len || !req.certs_address)
>   		goto cmd;
>   
> -	if (req.certs_len > SEV_FW_BLOB_MAX_SIZE ||
> -	    !IS_ALIGNED(req.certs_len, PAGE_SIZE))
> +	if (req_cert_len > sizeof(*snp_dev->certs_data) ||
> +	    !IS_ALIGNED(req_cert_len, PAGE_SIZE))
>   		return -EINVAL;
>   
> -	if (!access_ok((const void __user *)req.certs_address, req.certs_len))
> +	if (!access_ok((const void __user *)req.certs_address, req_cert_len))
>   		return -EFAULT;
>   
>   	/*
> @@ -464,8 +484,7 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques
>   	 * the host. If host does not supply any certs in it, then copy
>   	 * zeros to indicate that certificate data was not provided.
>   	 */
> -	memset(snp_dev->certs_data, 0, req.certs_len);
> -	npages = req.certs_len >> PAGE_SHIFT;
> +	memset(snp_dev->certs_data, 0, sizeof(*snp_dev->certs_data));
>   cmd:
>   	/*
>   	 * The intermediate response buffer is used while decrypting the
> @@ -477,25 +496,37 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques
>   	if (!resp)
>   		return -ENOMEM;
>   
> -	snp_dev->input.data_npages = npages;
> +	snp_dev->input.data_npages = sizeof(*snp_dev->certs_data) >> PAGE_SHIFT;
>   	ret = handle_guest_request(snp_dev, SVM_VMGEXIT_EXT_GUEST_REQUEST, arg->msg_version,
>   				   SNP_MSG_REPORT_REQ, &req.data,
>   				   sizeof(req.data), resp->data, resp_len, &arg->fw_err);
>   
> +	resp_cert_len = snp_dev->input.data_npages << PAGE_SHIFT;

The hypervisor is not required to update the number of pages that the 
certificates actually used/required if enough pages were supplied. So this 
value could always remain as 4 (based on SEV_FW_BLOB_MAX_SIZE) on 
successful return.

And if that's the case, we could always just return 4 for the number of 
pages no matter what. Otherwise you'll have to update the logic here if 
you want to obtain the actual number.

Thanks,
Tom

> +
>   	/* If certs length is invalid then copy the returned length */
>   	if (arg->fw_err == SNP_GUEST_REQ_INVALID_LEN) {
> -		req.certs_len = snp_dev->input.data_npages << PAGE_SHIFT;
> +		dev_alert(snp_dev->dev,
> +			  "Certificate data from host: %d, Max size allocated by driver: %lu.\n",
> +			  resp_cert_len, sizeof(*snp_dev->certs_data));
> +		ret = -EFAULT;
> +	}
> +
> +	if (ret)
> +		goto e_free;
> +
> +	/* Pass the actual certificate data size back to userspace */
> +	req.certs_len = resp_cert_len;
> +	if (resp_cert_len > req_cert_len) {
> +		arg->fw_err = SNP_GUEST_REQ_INVALID_LEN;
>   
>   		if (copy_to_user((void __user *)arg->req_data, &req, sizeof(req)))
>   			ret = -EFAULT;
> -	}
>   
> -	if (ret)
>   		goto e_free;
> +	}
>   
> -	if (npages &&
> -	    copy_to_user((void __user *)req.certs_address, snp_dev->certs_data,
> -			 req.certs_len)) {
> +	if (copy_to_user((void __user *)req.certs_address, snp_dev->certs_data,
> +			 resp_cert_len)) {
>   		ret = -EFAULT;
>   		goto e_free;
>   	}
> @@ -676,7 +707,7 @@ static int __init sev_guest_probe(struct platform_device *pdev)
>   	if (!snp_dev->response)
>   		goto e_free_request;
>   
> -	snp_dev->certs_data = alloc_shared_pages(dev, SEV_FW_BLOB_MAX_SIZE);
> +	snp_dev->certs_data = alloc_shared_pages(dev, sizeof(*snp_dev->certs_data));
>   	if (!snp_dev->certs_data)
>   		goto e_free_response;
>   
> @@ -703,7 +734,7 @@ static int __init sev_guest_probe(struct platform_device *pdev)
>   	return 0;
>   
>   e_free_cert_data:
> -	free_shared_pages(snp_dev->certs_data, SEV_FW_BLOB_MAX_SIZE);
> +	free_shared_pages(snp_dev->certs_data, sizeof(*snp_dev->certs_data));
>   e_free_response:
>   	free_shared_pages(snp_dev->response, sizeof(struct snp_guest_msg));
>   e_free_request:
> @@ -717,7 +748,7 @@ static int __exit sev_guest_remove(struct platform_device *pdev)
>   {
>   	struct snp_guest_dev *snp_dev = platform_get_drvdata(pdev);
>   
> -	free_shared_pages(snp_dev->certs_data, SEV_FW_BLOB_MAX_SIZE);
> +	free_shared_pages(snp_dev->certs_data, sizeof(*snp_dev->certs_data));
>   	free_shared_pages(snp_dev->response, sizeof(struct snp_guest_msg));
>   	free_shared_pages(snp_dev->request, sizeof(struct snp_guest_msg));
>   	deinit_crypto(snp_dev->crypto);

Peter Gonda Oct. 21, 2022, 8:57 p.m. UTC | #2

On Fri, Oct 21, 2022 at 1:02 PM Tom Lendacky <thomas.lendacky@amd.com> wrote:
>
> On 10/21/22 12:33, Peter Gonda wrote:
> > The ASP and an SNP guest use a series of AES-GCM keys called VMPCKs to
> > communicate securely with each other. The IV to this scheme is a
> > sequence number that both the ASP and the guest track. Currently this
> > sequence number in a guest request must exactly match the sequence
> > number tracked by the ASP. This means that if the guest sees an error
> > from the host during a request it can only retry that exact request or
> > disable the VMPCK to prevent an IV reuse. AES-GCM cannot tolerate IV
> > reuse see:
> > https://csrc.nist.gov/csrc/media/projects/block-cipher-techniques/documents/bcm/comments/800-38-series-drafts/gcm/joux_comments.pdf
> >
> > To handle userspace querying the cert_data length. Instead of requesting
> > the cert length from userspace use the size of the drivers allocated
> > shared buffer. Then copy that buffer to userspace, or give userspace an
> > error depending on the size of the buffer given by userspace.
> >
> > Fixes: fce96cf044308 ("virt: Add SEV-SNP guest driver")
> > Signed-off-by: Peter Gonda <pgonda@google.com>
> > Reported-by: Peter Gonda <pgonda@google.com>
> > Reviewed-by: Dionna Glaze <dionnaglaze@google.com>
> > Cc: Borislav Petkov <bp@suse.de>
> > Cc: Tom Lendacky <thomas.lendacky@amd.com>
> > Cc: Michael Roth <michael.roth@amd.com>
> > Cc: Haowen Bai <baihaowen@meizu.com>
> > Cc: Yang Yingliang <yangyingliang@huawei.com>
> > Cc: Marc Orr <marcorr@google.com>
> > Cc: David Rientjes <rientjes@google.com>
> > Cc: Ashish Kalra <Ashish.Kalra@amd.com>
> > Cc: linux-kernel@vger.kernel.org
> > Cc: kvm@vger.kernel.org
> > ---
> >   drivers/virt/coco/sev-guest/sev-guest.c | 93 ++++++++++++++++---------
> >   1 file changed, 62 insertions(+), 31 deletions(-)
> >
> > diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c
> > index f422f9c58ba7..8c54ea84bc57 100644
> > --- a/drivers/virt/coco/sev-guest/sev-guest.c
> > +++ b/drivers/virt/coco/sev-guest/sev-guest.c
> > @@ -41,7 +41,7 @@ struct snp_guest_dev {
> >       struct device *dev;
> >       struct miscdevice misc;
> >
> > -     void *certs_data;
> > +     u8 (*certs_data)[SEV_FW_BLOB_MAX_SIZE];
> >       struct snp_guest_crypto *crypto;
> >       struct snp_guest_msg *request, *response;
> >       struct snp_secrets_page_layout *layout;
> > @@ -67,8 +67,27 @@ static bool is_vmpck_empty(struct snp_guest_dev *snp_dev)
> >       return true;
> >   }
> >
> > +/*
> > + * If we receive an error from the host or ASP we have two options. We can
> > + * either retry the exact same encrypted request or we can discontinue using the
> > + * VMPCK.
> > + *
> > + * This is because in the current encryption scheme GHCB v2 uses AES-GCM to
> > + * encrypt the requests. The IV for this scheme is the sequence number. GCM
> > + * cannot tolerate IV reuse.
> > + *
> > + * The ASP FW v1.51 only increments the sequence numbers on a successful
> > + * guest<->ASP back and forth and only accepts messages at its exact sequence
> > + * number.
> > + *
> > + * So if we were to reuse the sequence number the encryption scheme is
> > + * vulnerable. If we encrypt the sequence number for a fresh IV the ASP will
> > + * reject our request.
> > + */
> >   static void snp_disable_vmpck(struct snp_guest_dev *snp_dev)
> >   {
> > +     dev_alert(snp_dev->dev, "Disabling vmpck_id: %d to prevent IV reuse.\n",
> > +               vmpck_id);
> >       memzero_explicit(snp_dev->vmpck, VMPCK_KEY_LEN);
> >       snp_dev->vmpck = NULL;
> >   }
> > @@ -326,29 +345,29 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in
> >       if (fw_err)
> >               *fw_err = err;
> >
> > -     if (rc)
> > -             return rc;
> > +     if (rc) {
> > +             dev_alert(snp_dev->dev,
> > +                       "Detected error from ASP request. rc: %d, fw_err: %llu\n",
> > +                       rc, *fw_err);
> > +             goto disable_vmpck;
> > +     }
> >
> > -     /*
> > -      * The verify_and_dec_payload() will fail only if the hypervisor is
> > -      * actively modifying the message header or corrupting the encrypted payload.
> > -      * This hints that hypervisor is acting in a bad faith. Disable the VMPCK so that
> > -      * the key cannot be used for any communication. The key is disabled to ensure
> > -      * that AES-GCM does not use the same IV while encrypting the request payload.
> > -      */
> >       rc = verify_and_dec_payload(snp_dev, resp_buf, resp_sz);
> >       if (rc) {
> >               dev_alert(snp_dev->dev,
> > -                       "Detected unexpected decode failure, disabling the vmpck_id %d\n",
> > -                       vmpck_id);
> > -             snp_disable_vmpck(snp_dev);
> > -             return rc;
> > +                       "Detected unexpected decode failure from ASP. rc: %d\n",
> > +                       rc);
> > +             goto disable_vmpck;
> >       }
> >
> >       /* Increment to new message sequence after payload decryption was successful. */
> >       snp_inc_msg_seqno(snp_dev);
> >
> >       return 0;
> > +
> > +disable_vmpck:
> > +     snp_disable_vmpck(snp_dev);
> > +     return rc;
> >   }
> >
> >   static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg)
> > @@ -437,7 +456,7 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques
> >       struct snp_guest_crypto *crypto = snp_dev->crypto;
> >       struct snp_ext_report_req req;
> >       struct snp_report_resp *resp;
> > -     int ret, npages = 0, resp_len;
> > +     int ret, resp_len, req_cert_len, resp_cert_len;
> >
> >       lockdep_assert_held(&snp_cmd_mutex);
> >
> > @@ -448,14 +467,15 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques
> >               return -EFAULT;
> >
> >       /* userspace does not want certificate data */
> > -     if (!req.certs_len || !req.certs_address)
> > +     req_cert_len = req.certs_len;
> > +     if (!req_cert_len || !req.certs_address)
> >               goto cmd;
> >
> > -     if (req.certs_len > SEV_FW_BLOB_MAX_SIZE ||
> > -         !IS_ALIGNED(req.certs_len, PAGE_SIZE))
> > +     if (req_cert_len > sizeof(*snp_dev->certs_data) ||
> > +         !IS_ALIGNED(req_cert_len, PAGE_SIZE))
> >               return -EINVAL;
> >
> > -     if (!access_ok((const void __user *)req.certs_address, req.certs_len))
> > +     if (!access_ok((const void __user *)req.certs_address, req_cert_len))
> >               return -EFAULT;
> >
> >       /*
> > @@ -464,8 +484,7 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques
> >        * the host. If host does not supply any certs in it, then copy
> >        * zeros to indicate that certificate data was not provided.
> >        */
> > -     memset(snp_dev->certs_data, 0, req.certs_len);
> > -     npages = req.certs_len >> PAGE_SHIFT;
> > +     memset(snp_dev->certs_data, 0, sizeof(*snp_dev->certs_data));
> >   cmd:
> >       /*
> >        * The intermediate response buffer is used while decrypting the
> > @@ -477,25 +496,37 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques
> >       if (!resp)
> >               return -ENOMEM;
> >
> > -     snp_dev->input.data_npages = npages;
> > +     snp_dev->input.data_npages = sizeof(*snp_dev->certs_data) >> PAGE_SHIFT;
> >       ret = handle_guest_request(snp_dev, SVM_VMGEXIT_EXT_GUEST_REQUEST, arg->msg_version,
> >                                  SNP_MSG_REPORT_REQ, &req.data,
> >                                  sizeof(req.data), resp->data, resp_len, &arg->fw_err);
> >
> > +     resp_cert_len = snp_dev->input.data_npages << PAGE_SHIFT;
>
> The hypervisor is not required to update the number of pages that the
> certificates actually used/required if enough pages were supplied. So this
> value could always remain as 4 (based on SEV_FW_BLOB_MAX_SIZE) on
> successful return.
>
> And if that's the case, we could always just return 4 for the number of
> pages no matter what. Otherwise you'll have to update the logic here if
> you want to obtain the actual number.

Are you asking for this to just hard code the userspace requirement to
4 pages? We could leave this as written here, that would leave the
guest open to a new GHCB spec where

"State from Hypervisor: on error will contain the number of guest
contiguous pages required to hold the data to be returned"

Is instead:

"State from Hypervisor: contain the number of guest contiguous pages
required to hold the data to be returned"

I think this would be a non-breaking change since the spec says
nothing of the non-error case currently. Fine with your preference
here. Either Dionna or I can follow up with a series to allow more
than 4pages if needed.

The logic required would be parsing the GUID table? I think we'd
rather keep that out of the kernel driver, right?

>
> Thanks,
> Tom
>
> > +
> >       /* If certs length is invalid then copy the returned length */
> >       if (arg->fw_err == SNP_GUEST_REQ_INVALID_LEN) {
> > -             req.certs_len = snp_dev->input.data_npages << PAGE_SHIFT;
> > +             dev_alert(snp_dev->dev,
> > +                       "Certificate data from host: %d, Max size allocated by driver: %lu.\n",
> > +                       resp_cert_len, sizeof(*snp_dev->certs_data));
> > +             ret = -EFAULT;
> > +     }
> > +
> > +     if (ret)
> > +             goto e_free;
> > +
> > +     /* Pass the actual certificate data size back to userspace */
> > +     req.certs_len = resp_cert_len;
> > +     if (resp_cert_len > req_cert_len) {
> > +             arg->fw_err = SNP_GUEST_REQ_INVALID_LEN;
> >
> >               if (copy_to_user((void __user *)arg->req_data, &req, sizeof(req)))
> >                       ret = -EFAULT;
> > -     }
> >
> > -     if (ret)
> >               goto e_free;
> > +     }
> >
> > -     if (npages &&
> > -         copy_to_user((void __user *)req.certs_address, snp_dev->certs_data,
> > -                      req.certs_len)) {
> > +     if (copy_to_user((void __user *)req.certs_address, snp_dev->certs_data,
> > +                      resp_cert_len)) {
> >               ret = -EFAULT;
> >               goto e_free;
> >       }
> > @@ -676,7 +707,7 @@ static int __init sev_guest_probe(struct platform_device *pdev)
> >       if (!snp_dev->response)
> >               goto e_free_request;
> >
> > -     snp_dev->certs_data = alloc_shared_pages(dev, SEV_FW_BLOB_MAX_SIZE);
> > +     snp_dev->certs_data = alloc_shared_pages(dev, sizeof(*snp_dev->certs_data));
> >       if (!snp_dev->certs_data)
> >               goto e_free_response;
> >
> > @@ -703,7 +734,7 @@ static int __init sev_guest_probe(struct platform_device *pdev)
> >       return 0;
> >
> >   e_free_cert_data:
> > -     free_shared_pages(snp_dev->certs_data, SEV_FW_BLOB_MAX_SIZE);
> > +     free_shared_pages(snp_dev->certs_data, sizeof(*snp_dev->certs_data));
> >   e_free_response:
> >       free_shared_pages(snp_dev->response, sizeof(struct snp_guest_msg));
> >   e_free_request:
> > @@ -717,7 +748,7 @@ static int __exit sev_guest_remove(struct platform_device *pdev)
> >   {
> >       struct snp_guest_dev *snp_dev = platform_get_drvdata(pdev);
> >
> > -     free_shared_pages(snp_dev->certs_data, SEV_FW_BLOB_MAX_SIZE);
> > +     free_shared_pages(snp_dev->certs_data, sizeof(*snp_dev->certs_data));
> >       free_shared_pages(snp_dev->response, sizeof(struct snp_guest_msg));
> >       free_shared_pages(snp_dev->request, sizeof(struct snp_guest_msg));
> >       deinit_crypto(snp_dev->crypto);

Tom Lendacky Oct. 21, 2022, 9:27 p.m. UTC | #3

On 10/21/22 15:57, Peter Gonda wrote:
> On Fri, Oct 21, 2022 at 1:02 PM Tom Lendacky <thomas.lendacky@amd.com> wrote:
>> On 10/21/22 12:33, Peter Gonda wrote:
>>> The ASP and an SNP guest use a series of AES-GCM keys called VMPCKs to
>>> communicate securely with each other. The IV to this scheme is a
>>> sequence number that both the ASP and the guest track. Currently this
>>> sequence number in a guest request must exactly match the sequence
>>> number tracked by the ASP. This means that if the guest sees an error
>>> from the host during a request it can only retry that exact request or
>>> disable the VMPCK to prevent an IV reuse. AES-GCM cannot tolerate IV
>>> reuse see:
>>> https://csrc.nist.gov/csrc/media/projects/block-cipher-techniques/documents/bcm/comments/800-38-series-drafts/gcm/joux_comments.pdf
>>>
>>> To handle userspace querying the cert_data length. Instead of requesting
>>> the cert length from userspace use the size of the drivers allocated
>>> shared buffer. Then copy that buffer to userspace, or give userspace an
>>> error depending on the size of the buffer given by userspace.
>>>
>>> Fixes: fce96cf044308 ("virt: Add SEV-SNP guest driver")
>>> Signed-off-by: Peter Gonda <pgonda@google.com>
>>> Reported-by: Peter Gonda <pgonda@google.com>
>>> Reviewed-by: Dionna Glaze <dionnaglaze@google.com>
>>> Cc: Borislav Petkov <bp@suse.de>
>>> Cc: Tom Lendacky <thomas.lendacky@amd.com>
>>> Cc: Michael Roth <michael.roth@amd.com>
>>> Cc: Haowen Bai <baihaowen@meizu.com>
>>> Cc: Yang Yingliang <yangyingliang@huawei.com>
>>> Cc: Marc Orr <marcorr@google.com>
>>> Cc: David Rientjes <rientjes@google.com>
>>> Cc: Ashish Kalra <Ashish.Kalra@amd.com>
>>> Cc: linux-kernel@vger.kernel.org
>>> Cc: kvm@vger.kernel.org
>>> ---

>>> @@ -477,25 +496,37 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques
>>>        if (!resp)
>>>                return -ENOMEM;
>>>
>>> -     snp_dev->input.data_npages = npages;
>>> +     snp_dev->input.data_npages = sizeof(*snp_dev->certs_data) >> PAGE_SHIFT;
>>>        ret = handle_guest_request(snp_dev, SVM_VMGEXIT_EXT_GUEST_REQUEST, arg->msg_version,
>>>                                   SNP_MSG_REPORT_REQ, &req.data,
>>>                                   sizeof(req.data), resp->data, resp_len, &arg->fw_err);
>>>
>>> +     resp_cert_len = snp_dev->input.data_npages << PAGE_SHIFT;
>>
>> The hypervisor is not required to update the number of pages that the
>> certificates actually used/required if enough pages were supplied. So this
>> value could always remain as 4 (based on SEV_FW_BLOB_MAX_SIZE) on
>> successful return.
>>
>> And if that's the case, we could always just return 4 for the number of
>> pages no matter what. Otherwise you'll have to update the logic here if
>> you want to obtain the actual number.
> 
> Are you asking for this to just hard code the userspace requirement to
> 4 pages? We could leave this as written here, that would leave the
> guest open to a new GHCB spec where

It's up to you. Ideally, if userspace provides a npages value of 0, then 
the driver issues the request with 0 and gets back the actual value. Then, 
to ensure the sequence numbers are updated, you issue the request again 
with the either the just returned value or SEV_FW_BLOB_MAX_SIZE >> 
PAGE_SHIFT. That will update the sequence numbers and the driver returns 
the number of pages required as returned from the first request.

That number can also be cached and then whenever userspace calls down with 
npages of 0, immediately return the cached value. If the request ever gets 
a SNP_GUEST_REQ_INVALID_LEN with the cached value, the newly returned 
value gets cached and the driver performs the request again, like the very 
first time in order to update the sequence numbers. The driver would then 
return the new npages value back to userspace. Of course that becomes the 
"minimum" number of pages now, so even if the hypervisor reduces the size 
of the certs data, it always requires more than needed.

> 
> "State from Hypervisor: on error will contain the number of guest
> contiguous pages required to hold the data to be returned"
> 
> Is instead:
> 
> "State from Hypervisor: contain the number of guest contiguous pages
> required to hold the data to be returned"

If the driver always returns 4, I don't see this as requiring any change 
to the spec. It may be more than is truly needed, but that doesn't matter, 
the cert data will fit, it just may be more than necessary. This can occur 
even if you pass back the actual number, if, in between the call with 0, 
the hypervisor updates the certs such that less pages are required.

> 
> I think this would be a non-breaking change since the spec says
> nothing of the non-error case currently. Fine with your preference
> here. Either Dionna or I can follow up with a series to allow more
> than 4pages if needed.

I'd prefer that userspace get the actual number, but really, I don't think 
it's a big deal to just return 4 until the driver can handle a more 
dynamic approach should more than 4 pages ever be needed (this would also 
require support on the hypervisor where currently not more than 4 pages 
are allowed to be provided, too).

I just wanted you to be aware that in testing you're likely to see 4 
always returned to userspace.

> 
> The logic required would be parsing the GUID table? I think we'd
> rather keep that out of the kernel driver, right?

No, that's not the logic I'm thinking of. I'm just thinking of using the 
userspace specified npages and acting accordingly.

Thanks,
Tom

>

Peter Gonda Oct. 27, 2022, 3:06 p.m. UTC | #4

On Fri, Oct 21, 2022 at 3:27 PM Tom Lendacky <thomas.lendacky@amd.com> wrote:
>
> On 10/21/22 15:57, Peter Gonda wrote:
> > On Fri, Oct 21, 2022 at 1:02 PM Tom Lendacky <thomas.lendacky@amd.com> wrote:
> >> On 10/21/22 12:33, Peter Gonda wrote:
> >>> The ASP and an SNP guest use a series of AES-GCM keys called VMPCKs to
> >>> communicate securely with each other. The IV to this scheme is a
> >>> sequence number that both the ASP and the guest track. Currently this
> >>> sequence number in a guest request must exactly match the sequence
> >>> number tracked by the ASP. This means that if the guest sees an error
> >>> from the host during a request it can only retry that exact request or
> >>> disable the VMPCK to prevent an IV reuse. AES-GCM cannot tolerate IV
> >>> reuse see:
> >>> https://csrc.nist.gov/csrc/media/projects/block-cipher-techniques/documents/bcm/comments/800-38-series-drafts/gcm/joux_comments.pdf
> >>>
> >>> To handle userspace querying the cert_data length. Instead of requesting
> >>> the cert length from userspace use the size of the drivers allocated
> >>> shared buffer. Then copy that buffer to userspace, or give userspace an
> >>> error depending on the size of the buffer given by userspace.
> >>>
> >>> Fixes: fce96cf044308 ("virt: Add SEV-SNP guest driver")
> >>> Signed-off-by: Peter Gonda <pgonda@google.com>
> >>> Reported-by: Peter Gonda <pgonda@google.com>
> >>> Reviewed-by: Dionna Glaze <dionnaglaze@google.com>
> >>> Cc: Borislav Petkov <bp@suse.de>
> >>> Cc: Tom Lendacky <thomas.lendacky@amd.com>
> >>> Cc: Michael Roth <michael.roth@amd.com>
> >>> Cc: Haowen Bai <baihaowen@meizu.com>
> >>> Cc: Yang Yingliang <yangyingliang@huawei.com>
> >>> Cc: Marc Orr <marcorr@google.com>
> >>> Cc: David Rientjes <rientjes@google.com>
> >>> Cc: Ashish Kalra <Ashish.Kalra@amd.com>
> >>> Cc: linux-kernel@vger.kernel.org
> >>> Cc: kvm@vger.kernel.org
> >>> ---
>
> >>> @@ -477,25 +496,37 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques
> >>>        if (!resp)
> >>>                return -ENOMEM;
> >>>
> >>> -     snp_dev->input.data_npages = npages;
> >>> +     snp_dev->input.data_npages = sizeof(*snp_dev->certs_data) >> PAGE_SHIFT;
> >>>        ret = handle_guest_request(snp_dev, SVM_VMGEXIT_EXT_GUEST_REQUEST, arg->msg_version,
> >>>                                   SNP_MSG_REPORT_REQ, &req.data,
> >>>                                   sizeof(req.data), resp->data, resp_len, &arg->fw_err);
> >>>
> >>> +     resp_cert_len = snp_dev->input.data_npages << PAGE_SHIFT;
> >>
> >> The hypervisor is not required to update the number of pages that the
> >> certificates actually used/required if enough pages were supplied. So this
> >> value could always remain as 4 (based on SEV_FW_BLOB_MAX_SIZE) on
> >> successful return.
> >>
> >> And if that's the case, we could always just return 4 for the number of
> >> pages no matter what. Otherwise you'll have to update the logic here if
> >> you want to obtain the actual number.
> >
> > Are you asking for this to just hard code the userspace requirement to
> > 4 pages? We could leave this as written here, that would leave the
> > guest open to a new GHCB spec where
>
> It's up to you. Ideally, if userspace provides a npages value of 0, then
> the driver issues the request with 0 and gets back the actual value. Then,
> to ensure the sequence numbers are updated, you issue the request again
> with the either the just returned value or SEV_FW_BLOB_MAX_SIZE >>
> PAGE_SHIFT. That will update the sequence numbers and the driver returns
> the number of pages required as returned from the first request.
>
> That number can also be cached and then whenever userspace calls down with
> npages of 0, immediately return the cached value. If the request ever gets
> a SNP_GUEST_REQ_INVALID_LEN with the cached value, the newly returned
> value gets cached and the driver performs the request again, like the very
> first time in order to update the sequence numbers. The driver would then
> return the new npages value back to userspace. Of course that becomes the
> "minimum" number of pages now, so even if the hypervisor reduces the size
> of the certs data, it always requires more than needed.
>
> >
> > "State from Hypervisor: on error will contain the number of guest
> > contiguous pages required to hold the data to be returned"
> >
> > Is instead:
> >
> > "State from Hypervisor: contain the number of guest contiguous pages
> > required to hold the data to be returned"
>
> If the driver always returns 4, I don't see this as requiring any change
> to the spec. It may be more than is truly needed, but that doesn't matter,
> the cert data will fit, it just may be more than necessary. This can occur
> even if you pass back the actual number, if, in between the call with 0,
> the hypervisor updates the certs such that less pages are required.
>
> >
> > I think this would be a non-breaking change since the spec says
> > nothing of the non-error case currently. Fine with your preference
> > here. Either Dionna or I can follow up with a series to allow more
> > than 4pages if needed.
>
> I'd prefer that userspace get the actual number, but really, I don't think
> it's a big deal to just return 4 until the driver can handle a more
> dynamic approach should more than 4 pages ever be needed (this would also
> require support on the hypervisor where currently not more than 4 pages
> are allowed to be provided, too).
>
> I just wanted you to be aware that in testing you're likely to see 4
> always returned to userspace.

So if you want userspace to get the actual number I think we want the
host to tell us the actual number. Currently userspace only gets a
upper bound since these requests race against host changes:

0. Host updates cert_data to be 10pages
1. Guest requests SNP EXT REQ for len query
2. Host returns 10 pages
3. Host updates cert_data to be 2pages
4. Guest requests SNP EXT REQ with 10page buffer
5. Host returns certs


I have sent a V3 series. I left this patch largely the same but added
a second patch to fix up the extended request retrying.


>
> >
> > The logic required would be parsing the GUID table? I think we'd
> > rather keep that out of the kernel driver, right?
>
> No, that's not the logic I'm thinking of. I'm just thinking of using the
> userspace specified npages and acting accordingly.
>
> Thanks,
> Tom
>
> >

diff mbox series

Patch

diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c
index f422f9c58ba7..8c54ea84bc57 100644
--- a/drivers/virt/coco/sev-guest/sev-guest.c
+++ b/drivers/virt/coco/sev-guest/sev-guest.c
@@ -41,7 +41,7 @@  struct snp_guest_dev {
 	struct device *dev;
 	struct miscdevice misc;
 
-	void *certs_data;
+	u8 (*certs_data)[SEV_FW_BLOB_MAX_SIZE];
 	struct snp_guest_crypto *crypto;
 	struct snp_guest_msg *request, *response;
 	struct snp_secrets_page_layout *layout;
@@ -67,8 +67,27 @@  static bool is_vmpck_empty(struct snp_guest_dev *snp_dev)
 	return true;
 }
 
+/*
+ * If we receive an error from the host or ASP we have two options. We can
+ * either retry the exact same encrypted request or we can discontinue using the
+ * VMPCK.
+ *
+ * This is because in the current encryption scheme GHCB v2 uses AES-GCM to
+ * encrypt the requests. The IV for this scheme is the sequence number. GCM
+ * cannot tolerate IV reuse.
+ *
+ * The ASP FW v1.51 only increments the sequence numbers on a successful
+ * guest<->ASP back and forth and only accepts messages at its exact sequence
+ * number.
+ *
+ * So if we were to reuse the sequence number the encryption scheme is
+ * vulnerable. If we encrypt the sequence number for a fresh IV the ASP will
+ * reject our request.
+ */
 static void snp_disable_vmpck(struct snp_guest_dev *snp_dev)
 {
+	dev_alert(snp_dev->dev, "Disabling vmpck_id: %d to prevent IV reuse.\n",
+		  vmpck_id);
 	memzero_explicit(snp_dev->vmpck, VMPCK_KEY_LEN);
 	snp_dev->vmpck = NULL;
 }
@@ -326,29 +345,29 @@  static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in
 	if (fw_err)
 		*fw_err = err;
 
-	if (rc)
-		return rc;
+	if (rc) {
+		dev_alert(snp_dev->dev,
+			  "Detected error from ASP request. rc: %d, fw_err: %llu\n",
+			  rc, *fw_err);
+		goto disable_vmpck;
+	}
 
-	/*
-	 * The verify_and_dec_payload() will fail only if the hypervisor is
-	 * actively modifying the message header or corrupting the encrypted payload.
-	 * This hints that hypervisor is acting in a bad faith. Disable the VMPCK so that
-	 * the key cannot be used for any communication. The key is disabled to ensure
-	 * that AES-GCM does not use the same IV while encrypting the request payload.
-	 */
 	rc = verify_and_dec_payload(snp_dev, resp_buf, resp_sz);
 	if (rc) {
 		dev_alert(snp_dev->dev,
-			  "Detected unexpected decode failure, disabling the vmpck_id %d\n",
-			  vmpck_id);
-		snp_disable_vmpck(snp_dev);
-		return rc;
+			  "Detected unexpected decode failure from ASP. rc: %d\n",
+			  rc);
+		goto disable_vmpck;
 	}
 
 	/* Increment to new message sequence after payload decryption was successful. */
 	snp_inc_msg_seqno(snp_dev);
 
 	return 0;
+
+disable_vmpck:
+	snp_disable_vmpck(snp_dev);
+	return rc;
 }
 
 static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg)
@@ -437,7 +456,7 @@  static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques
 	struct snp_guest_crypto *crypto = snp_dev->crypto;
 	struct snp_ext_report_req req;
 	struct snp_report_resp *resp;
-	int ret, npages = 0, resp_len;
+	int ret, resp_len, req_cert_len, resp_cert_len;
 
 	lockdep_assert_held(&snp_cmd_mutex);
 
@@ -448,14 +467,15 @@  static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques
 		return -EFAULT;
 
 	/* userspace does not want certificate data */
-	if (!req.certs_len || !req.certs_address)
+	req_cert_len = req.certs_len;
+	if (!req_cert_len || !req.certs_address)
 		goto cmd;
 
-	if (req.certs_len > SEV_FW_BLOB_MAX_SIZE ||
-	    !IS_ALIGNED(req.certs_len, PAGE_SIZE))
+	if (req_cert_len > sizeof(*snp_dev->certs_data) ||
+	    !IS_ALIGNED(req_cert_len, PAGE_SIZE))
 		return -EINVAL;
 
-	if (!access_ok((const void __user *)req.certs_address, req.certs_len))
+	if (!access_ok((const void __user *)req.certs_address, req_cert_len))
 		return -EFAULT;
 
 	/*
@@ -464,8 +484,7 @@  static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques
 	 * the host. If host does not supply any certs in it, then copy
 	 * zeros to indicate that certificate data was not provided.
 	 */
-	memset(snp_dev->certs_data, 0, req.certs_len);
-	npages = req.certs_len >> PAGE_SHIFT;
+	memset(snp_dev->certs_data, 0, sizeof(*snp_dev->certs_data));
 cmd:
 	/*
 	 * The intermediate response buffer is used while decrypting the
@@ -477,25 +496,37 @@  static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques
 	if (!resp)
 		return -ENOMEM;
 
-	snp_dev->input.data_npages = npages;
+	snp_dev->input.data_npages = sizeof(*snp_dev->certs_data) >> PAGE_SHIFT;
 	ret = handle_guest_request(snp_dev, SVM_VMGEXIT_EXT_GUEST_REQUEST, arg->msg_version,
 				   SNP_MSG_REPORT_REQ, &req.data,
 				   sizeof(req.data), resp->data, resp_len, &arg->fw_err);
 
+	resp_cert_len = snp_dev->input.data_npages << PAGE_SHIFT;
+
 	/* If certs length is invalid then copy the returned length */
 	if (arg->fw_err == SNP_GUEST_REQ_INVALID_LEN) {
-		req.certs_len = snp_dev->input.data_npages << PAGE_SHIFT;
+		dev_alert(snp_dev->dev,
+			  "Certificate data from host: %d, Max size allocated by driver: %lu.\n",
+			  resp_cert_len, sizeof(*snp_dev->certs_data));
+		ret = -EFAULT;
+	}
+
+	if (ret)
+		goto e_free;
+
+	/* Pass the actual certificate data size back to userspace */
+	req.certs_len = resp_cert_len;
+	if (resp_cert_len > req_cert_len) {
+		arg->fw_err = SNP_GUEST_REQ_INVALID_LEN;
 
 		if (copy_to_user((void __user *)arg->req_data, &req, sizeof(req)))
 			ret = -EFAULT;
-	}
 
-	if (ret)
 		goto e_free;
+	}
 
-	if (npages &&
-	    copy_to_user((void __user *)req.certs_address, snp_dev->certs_data,
-			 req.certs_len)) {
+	if (copy_to_user((void __user *)req.certs_address, snp_dev->certs_data,
+			 resp_cert_len)) {
 		ret = -EFAULT;
 		goto e_free;
 	}
@@ -676,7 +707,7 @@  static int __init sev_guest_probe(struct platform_device *pdev)
 	if (!snp_dev->response)
 		goto e_free_request;
 
-	snp_dev->certs_data = alloc_shared_pages(dev, SEV_FW_BLOB_MAX_SIZE);
+	snp_dev->certs_data = alloc_shared_pages(dev, sizeof(*snp_dev->certs_data));
 	if (!snp_dev->certs_data)
 		goto e_free_response;
 
@@ -703,7 +734,7 @@  static int __init sev_guest_probe(struct platform_device *pdev)
 	return 0;
 
 e_free_cert_data:
-	free_shared_pages(snp_dev->certs_data, SEV_FW_BLOB_MAX_SIZE);
+	free_shared_pages(snp_dev->certs_data, sizeof(*snp_dev->certs_data));
 e_free_response:
 	free_shared_pages(snp_dev->response, sizeof(struct snp_guest_msg));
 e_free_request:
@@ -717,7 +748,7 @@  static int __exit sev_guest_remove(struct platform_device *pdev)
 {
 	struct snp_guest_dev *snp_dev = platform_get_drvdata(pdev);
 
-	free_shared_pages(snp_dev->certs_data, SEV_FW_BLOB_MAX_SIZE);
+	free_shared_pages(snp_dev->certs_data, sizeof(*snp_dev->certs_data));
 	free_shared_pages(snp_dev->response, sizeof(struct snp_guest_msg));
 	free_shared_pages(snp_dev->request, sizeof(struct snp_guest_msg));
 	deinit_crypto(snp_dev->crypto);