[v4,4/4] virt/coco/sev-guest: interpret VMM errors from guest request

Message ID 20221024224657.2917482-5-dionnaglaze@google.com
State New
Headers
Series Add throttling detection to sev-guest |

Commit Message

Dionna Amalie Glaze Oct. 24, 2022, 10:46 p.m. UTC
  The GHCB specification states that the upper 32 bits of exitinfo2 are
for the VMM's error codes. The sev-guest ABI has already locked in
that the fw_err status of the input will be 64 bits, and that
BIT_ULL(32) means that the extended guest request's data buffer was too
small, so we have to keep that ABI.

We can still interpret the upper 32 bits of exitinfo2 for the user
anyway in case the request gets throttled. For safety, since the
encryption algorithm in GHCBv2 is AES_GCM, we cannot return to user
space without having completed the request with the current sequence
number. If we were to return and the guest were to make another request
but with different message contents, then that would be IV reuse.

When throttled, the driver will reschedule itself and then try
again. The ioctl may block indefinitely, but that has always been the
case when deferring these requests to the host.

Cc: Tom Lendacky <Thomas.Lendacky@amd.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Joerg Roedel <jroedel@suse.de>
Cc: Peter Gonda <pgonda@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>

Signed-off-by: Dionna Glaze <dionnaglaze@google.com>
---
 drivers/virt/coco/sev-guest/sev-guest.c | 54 ++++++++++++++++++++++---
 include/uapi/linux/sev-guest.h          | 18 ++++++++-
 2 files changed, 64 insertions(+), 8 deletions(-)
  

Comments

Tom Lendacky Oct. 28, 2022, 3:15 p.m. UTC | #1
On 10/24/22 17:46, Dionna Glaze wrote:
> The GHCB specification states that the upper 32 bits of exitinfo2 are
> for the VMM's error codes. The sev-guest ABI has already locked in
> that the fw_err status of the input will be 64 bits, and that
> BIT_ULL(32) means that the extended guest request's data buffer was too
> small, so we have to keep that ABI.
> 
> We can still interpret the upper 32 bits of exitinfo2 for the user
> anyway in case the request gets throttled. For safety, since the
> encryption algorithm in GHCBv2 is AES_GCM, we cannot return to user
> space without having completed the request with the current sequence
> number. If we were to return and the guest were to make another request
> but with different message contents, then that would be IV reuse.
> 
> When throttled, the driver will reschedule itself and then try
> again. The ioctl may block indefinitely, but that has always been the
> case when deferring these requests to the host.
> 
> Cc: Tom Lendacky <Thomas.Lendacky@amd.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Joerg Roedel <jroedel@suse.de>
> Cc: Peter Gonda <pgonda@google.com>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Dave Hansen <dave.hansen@linux.intel.com>
> 
> Signed-off-by: Dionna Glaze <dionnaglaze@google.com>
> ---
>   drivers/virt/coco/sev-guest/sev-guest.c | 54 ++++++++++++++++++++++---
>   include/uapi/linux/sev-guest.h          | 18 ++++++++-
>   2 files changed, 64 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c
> index 0508c2f46f6b..dfde777933b6 100644
> --- a/drivers/virt/coco/sev-guest/sev-guest.c
> +++ b/drivers/virt/coco/sev-guest/sev-guest.c
> @@ -14,6 +14,7 @@
>   #include <linux/io.h>
>   #include <linux/platform_device.h>
>   #include <linux/miscdevice.h>
> +#include <linux/ratelimit.h>
>   #include <linux/set_memory.h>
>   #include <linux/fs.h>
>   #include <crypto/aead.h>
> @@ -48,12 +49,22 @@ struct snp_guest_dev {
>   	struct snp_req_data input;
>   	u32 *os_area_msg_seqno;
>   	u8 *vmpck;
> +
> +	struct ratelimit_state rs;
>   };
>   
>   static u32 vmpck_id;
>   module_param(vmpck_id, uint, 0444);
>   MODULE_PARM_DESC(vmpck_id, "The VMPCK ID to use when communicating with the PSP.");
>   
> +static int rate_hz = 2;

s/int/unsigned int/ to match uint below.

> +module_param(rate_hz, uint, 0444);
> +MODULE_PARM_DESC(vmpck_id, "The rate limit frequency to limit requests to.");
> +
> +static int rate_burst = 1;

s/int/unsigned int/ to match uint below.

> +module_param(rate_burst, uint, 0444);
> +MODULE_PARM_DESC(rate_burst, "The rate limit burst amount to limit requests to.");
> +
>   /* Mutex to serialize the shared buffer access and command handling. */
>   static DEFINE_MUTEX(snp_cmd_mutex);
>   
> @@ -305,9 +316,12 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in
>   				u8 type, void *req_buf, size_t req_sz, void *resp_buf,
>   				u32 resp_sz, __u64 *exitinfo2)
>   {
> +	unsigned int vmm_err;
>   	u64 seqno;
>   	int rc;
>   
> +	might_resched();
> +

Not sure this is needed. This may call cond_resched() right away, so I 
don't think this is doing what you think it is (as I only see it used in 
scheduler code and spinlock code). Did you mean to use might_sleep()?

>   	/* Get message sequence and verify that its a non-zero */
>   	seqno = snp_get_msg_seqno(snp_dev);
>   	if (!seqno)
> @@ -320,9 +334,35 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in
>   	if (rc)
>   		return rc;
>   
> +retry:
> +	/*
> +	 * Rate limit commands internally since the host can also throttle, and
> +	 * we don't want to create a tight request spin that could end up
> +	 * getting this VM throttled more heavily.
> +	 */
> +	if (!__ratelimit(&snp_dev->rs)) {

This doesn't do any sleeping/delaying, it just returns a 0 or 1, so you 
could still cause a tight spin here. I guess that shouldn't be a problem 
is nothing else is ready to run. But maybe adding an msleep()/usleep() 
here based on the rate limit parameters (half the rate limit?) would be 
good so that the CPU isn't pegged while rating for the ratelimit to be 
satisified?

I'll let others chime in on that and see if it is appropriate.

Thanks,
Tom

> +		cond_resched();
> +		goto retry;
> +	}
>   	/* Call firmware to process the request */
>   	rc = snp_issue_guest_request(exit_code, &snp_dev->input, exitinfo2);
>   
> +	vmm_err = *exitinfo2 >> SNP_GUEST_VMM_ERR_SHIFT;
> +	/*
> +	 * The host may return EBUSY if the request has been throttled.
> +	 * We retry in the driver to avoid returning and reusing the message
> +	 * sequence number on a different message.
> +	 */
> +	if (vmm_err == SNP_GUEST_VMM_ERR_BUSY) {
> +		cond_resched();
> +		goto retry;
> +	}
> +
> +	if (vmm_err && vmm_err != SNP_GUEST_VMM_ERR_INVALID_LEN) {
> +		pr_err("sev-guest: host returned unknown error code: %d\n",
> +		       vmm_err);
> +		return -EINVAL;
> +	}
>   	if (rc)
>   		return rc;
>   
> @@ -375,7 +415,7 @@ static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_io
>   
>   	rc = handle_guest_request(snp_dev, SVM_VMGEXIT_GUEST_REQUEST, arg->msg_version,
>   				  SNP_MSG_REPORT_REQ, &req, sizeof(req), resp->data,
> -				  resp_len, &arg->fw_err);
> +				  resp_len, &arg->exitinfo2);
>   	if (rc)
>   		goto e_free;
>   
> @@ -415,7 +455,7 @@ static int get_derived_key(struct snp_guest_dev *snp_dev, struct snp_guest_reque
>   
>   	rc = handle_guest_request(snp_dev, SVM_VMGEXIT_GUEST_REQUEST, arg->msg_version,
>   				  SNP_MSG_KEY_REQ, &req, sizeof(req), buf, resp_len,
> -				  &arg->fw_err);
> +				  &arg->exitinfo2);
>   	if (rc)
>   		return rc;
>   
> @@ -477,10 +517,10 @@ static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques
>   	snp_dev->input.data_npages = npages;
>   	ret = handle_guest_request(snp_dev, SVM_VMGEXIT_EXT_GUEST_REQUEST, arg->msg_version,
>   				   SNP_MSG_REPORT_REQ, &req.data,
> -				   sizeof(req.data), resp->data, resp_len, &arg->fw_err);
> +				   sizeof(req.data), resp->data, resp_len, &arg->exitinfo2);
>   
>   	/* If certs length is invalid then copy the returned length */
> -	if (arg->fw_err == SNP_GUEST_REQ_INVALID_LEN) {
> +	if (arg->vmm_error == SNP_GUEST_VMM_ERR_INVALID_LEN) {
>   		req.certs_len = snp_dev->input.data_npages << PAGE_SHIFT;
>   
>   		if (copy_to_user((void __user *)arg->req_data, &req, sizeof(req)))
> @@ -515,7 +555,7 @@ static long snp_guest_ioctl(struct file *file, unsigned int ioctl, unsigned long
>   	if (copy_from_user(&input, argp, sizeof(input)))
>   		return -EFAULT;
>   
> -	input.fw_err = 0xff;
> +	input.exitinfo2 = SEV_RET_NO_FW_CALL;
>   
>   	/* Message version must be non-zero */
>   	if (!input.msg_version)
> @@ -546,7 +586,7 @@ static long snp_guest_ioctl(struct file *file, unsigned int ioctl, unsigned long
>   
>   	mutex_unlock(&snp_cmd_mutex);
>   
> -	if (input.fw_err && copy_to_user(argp, &input, sizeof(input)))
> +	if (input.exitinfo2 && copy_to_user(argp, &input, sizeof(input)))
>   		return -EFAULT;
>   
>   	return ret;
> @@ -696,6 +736,8 @@ static int __init sev_guest_probe(struct platform_device *pdev)
>   	if (ret)
>   		goto e_free_cert_data;
>   
> +	ratelimit_state_init(&snp_dev->rs, rate_hz * HZ, rate_burst);
> +
>   	dev_info(dev, "Initialized SEV guest driver (using vmpck_id %d)\n", vmpck_id);
>   	return 0;
>   
> diff --git a/include/uapi/linux/sev-guest.h b/include/uapi/linux/sev-guest.h
> index 256aaeff7e65..8e4144aa78c9 100644
> --- a/include/uapi/linux/sev-guest.h
> +++ b/include/uapi/linux/sev-guest.h
> @@ -52,8 +52,15 @@ struct snp_guest_request_ioctl {
>   	__u64 req_data;
>   	__u64 resp_data;
>   
> -	/* firmware error code on failure (see psp-sev.h) */
> -	__u64 fw_err;
> +	/* bits[63:32]: VMM error code, bits[31:0] firmware error code (see psp-sev.h) */
> +	union {
> +		__u64 exitinfo2;
> +		__u64 fw_err; /* Name deprecated in favor of others */
> +		struct {
> +			__u32 fw_error;
> +			__u32 vmm_error;
> +		};
> +	};
>   };
>   
>   struct snp_ext_report_req {
> @@ -77,4 +84,11 @@ struct snp_ext_report_req {
>   /* Get SNP extended report as defined in the GHCB specification version 2. */
>   #define SNP_GET_EXT_REPORT _IOWR(SNP_GUEST_REQ_IOC_TYPE, 0x2, struct snp_guest_request_ioctl)
>   
> +/* Guest message request EXIT_INFO_2 constants */
> +#define SNP_GUEST_FW_ERR_MASK		GENMASK_ULL(31, 0)
> +#define SNP_GUEST_VMM_ERR_SHIFT		32
> +
> +#define SNP_GUEST_VMM_ERR_INVALID_LEN	1
> +#define SNP_GUEST_VMM_ERR_BUSY		2
> +
>   #endif /* __UAPI_LINUX_SEV_GUEST_H_ */
  
Dionna Amalie Glaze Oct. 28, 2022, 3:57 p.m. UTC | #2
> >   static u32 vmpck_id;
> >   module_param(vmpck_id, uint, 0444);
> >   MODULE_PARM_DESC(vmpck_id, "The VMPCK ID to use when communicating with the PSP.");
> >
> > +static int rate_hz = 2;
>
> s/int/unsigned int/ to match uint below.
>

I'll change the uint to int, since the types in ratelimit_struct are int.

> > +module_param(rate_burst, uint, 0444);
> > +MODULE_PARM_DESC(rate_burst, "The rate limit burst amount to limit requests to.");
> > +
> >   /* Mutex to serialize the shared buffer access and command handling. */
> >   static DEFINE_MUTEX(snp_cmd_mutex);
> >
> > @@ -305,9 +316,12 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in
> >                               u8 type, void *req_buf, size_t req_sz, void *resp_buf,
> >                               u32 resp_sz, __u64 *exitinfo2)
> >   {
> > +     unsigned int vmm_err;
> >       u64 seqno;
> >       int rc;
> >
> > +     might_resched();
> > +
>
> Not sure this is needed. This may call cond_resched() right away, so I
> don't think this is doing what you think it is (as I only see it used in
> scheduler code and spinlock code). Did you mean to use might_sleep()?

Rereading the docs about it, I'll remove it.

>
> >       /* Get message sequence and verify that its a non-zero */
> >       seqno = snp_get_msg_seqno(snp_dev);
> >       if (!seqno)
> > @@ -320,9 +334,35 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in
> >       if (rc)
> >               return rc;
> >
> > +retry:
> > +     /*
> > +      * Rate limit commands internally since the host can also throttle, and
> > +      * we don't want to create a tight request spin that could end up
> > +      * getting this VM throttled more heavily.
> > +      */
> > +     if (!__ratelimit(&snp_dev->rs)) {
>
> This doesn't do any sleeping/delaying, it just returns a 0 or 1, so you
> could still cause a tight spin here. I guess that shouldn't be a problem
> is nothing else is ready to run. But maybe adding an msleep()/usleep()
> here based on the rate limit parameters (half the rate limit?) would be
> good so that the CPU isn't pegged while rating for the ratelimit to be
> satisified?
>

It looks like sleep_timeout_interruptible((rate_hz * HZ) / 2) could be
the appropriate solution for this.

> I'll let others chime in on that and see if it is appropriate.
>

Same, I'll wait until later Monday to send out v5.

> Thanks,
> Tom

Thanks for the reviews :)
  

Patch

diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c
index 0508c2f46f6b..dfde777933b6 100644
--- a/drivers/virt/coco/sev-guest/sev-guest.c
+++ b/drivers/virt/coco/sev-guest/sev-guest.c
@@ -14,6 +14,7 @@ 
 #include <linux/io.h>
 #include <linux/platform_device.h>
 #include <linux/miscdevice.h>
+#include <linux/ratelimit.h>
 #include <linux/set_memory.h>
 #include <linux/fs.h>
 #include <crypto/aead.h>
@@ -48,12 +49,22 @@  struct snp_guest_dev {
 	struct snp_req_data input;
 	u32 *os_area_msg_seqno;
 	u8 *vmpck;
+
+	struct ratelimit_state rs;
 };
 
 static u32 vmpck_id;
 module_param(vmpck_id, uint, 0444);
 MODULE_PARM_DESC(vmpck_id, "The VMPCK ID to use when communicating with the PSP.");
 
+static int rate_hz = 2;
+module_param(rate_hz, uint, 0444);
+MODULE_PARM_DESC(vmpck_id, "The rate limit frequency to limit requests to.");
+
+static int rate_burst = 1;
+module_param(rate_burst, uint, 0444);
+MODULE_PARM_DESC(rate_burst, "The rate limit burst amount to limit requests to.");
+
 /* Mutex to serialize the shared buffer access and command handling. */
 static DEFINE_MUTEX(snp_cmd_mutex);
 
@@ -305,9 +316,12 @@  static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in
 				u8 type, void *req_buf, size_t req_sz, void *resp_buf,
 				u32 resp_sz, __u64 *exitinfo2)
 {
+	unsigned int vmm_err;
 	u64 seqno;
 	int rc;
 
+	might_resched();
+
 	/* Get message sequence and verify that its a non-zero */
 	seqno = snp_get_msg_seqno(snp_dev);
 	if (!seqno)
@@ -320,9 +334,35 @@  static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in
 	if (rc)
 		return rc;
 
+retry:
+	/*
+	 * Rate limit commands internally since the host can also throttle, and
+	 * we don't want to create a tight request spin that could end up
+	 * getting this VM throttled more heavily.
+	 */
+	if (!__ratelimit(&snp_dev->rs)) {
+		cond_resched();
+		goto retry;
+	}
 	/* Call firmware to process the request */
 	rc = snp_issue_guest_request(exit_code, &snp_dev->input, exitinfo2);
 
+	vmm_err = *exitinfo2 >> SNP_GUEST_VMM_ERR_SHIFT;
+	/*
+	 * The host may return EBUSY if the request has been throttled.
+	 * We retry in the driver to avoid returning and reusing the message
+	 * sequence number on a different message.
+	 */
+	if (vmm_err == SNP_GUEST_VMM_ERR_BUSY) {
+		cond_resched();
+		goto retry;
+	}
+
+	if (vmm_err && vmm_err != SNP_GUEST_VMM_ERR_INVALID_LEN) {
+		pr_err("sev-guest: host returned unknown error code: %d\n",
+		       vmm_err);
+		return -EINVAL;
+	}
 	if (rc)
 		return rc;
 
@@ -375,7 +415,7 @@  static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_io
 
 	rc = handle_guest_request(snp_dev, SVM_VMGEXIT_GUEST_REQUEST, arg->msg_version,
 				  SNP_MSG_REPORT_REQ, &req, sizeof(req), resp->data,
-				  resp_len, &arg->fw_err);
+				  resp_len, &arg->exitinfo2);
 	if (rc)
 		goto e_free;
 
@@ -415,7 +455,7 @@  static int get_derived_key(struct snp_guest_dev *snp_dev, struct snp_guest_reque
 
 	rc = handle_guest_request(snp_dev, SVM_VMGEXIT_GUEST_REQUEST, arg->msg_version,
 				  SNP_MSG_KEY_REQ, &req, sizeof(req), buf, resp_len,
-				  &arg->fw_err);
+				  &arg->exitinfo2);
 	if (rc)
 		return rc;
 
@@ -477,10 +517,10 @@  static int get_ext_report(struct snp_guest_dev *snp_dev, struct snp_guest_reques
 	snp_dev->input.data_npages = npages;
 	ret = handle_guest_request(snp_dev, SVM_VMGEXIT_EXT_GUEST_REQUEST, arg->msg_version,
 				   SNP_MSG_REPORT_REQ, &req.data,
-				   sizeof(req.data), resp->data, resp_len, &arg->fw_err);
+				   sizeof(req.data), resp->data, resp_len, &arg->exitinfo2);
 
 	/* If certs length is invalid then copy the returned length */
-	if (arg->fw_err == SNP_GUEST_REQ_INVALID_LEN) {
+	if (arg->vmm_error == SNP_GUEST_VMM_ERR_INVALID_LEN) {
 		req.certs_len = snp_dev->input.data_npages << PAGE_SHIFT;
 
 		if (copy_to_user((void __user *)arg->req_data, &req, sizeof(req)))
@@ -515,7 +555,7 @@  static long snp_guest_ioctl(struct file *file, unsigned int ioctl, unsigned long
 	if (copy_from_user(&input, argp, sizeof(input)))
 		return -EFAULT;
 
-	input.fw_err = 0xff;
+	input.exitinfo2 = SEV_RET_NO_FW_CALL;
 
 	/* Message version must be non-zero */
 	if (!input.msg_version)
@@ -546,7 +586,7 @@  static long snp_guest_ioctl(struct file *file, unsigned int ioctl, unsigned long
 
 	mutex_unlock(&snp_cmd_mutex);
 
-	if (input.fw_err && copy_to_user(argp, &input, sizeof(input)))
+	if (input.exitinfo2 && copy_to_user(argp, &input, sizeof(input)))
 		return -EFAULT;
 
 	return ret;
@@ -696,6 +736,8 @@  static int __init sev_guest_probe(struct platform_device *pdev)
 	if (ret)
 		goto e_free_cert_data;
 
+	ratelimit_state_init(&snp_dev->rs, rate_hz * HZ, rate_burst);
+
 	dev_info(dev, "Initialized SEV guest driver (using vmpck_id %d)\n", vmpck_id);
 	return 0;
 
diff --git a/include/uapi/linux/sev-guest.h b/include/uapi/linux/sev-guest.h
index 256aaeff7e65..8e4144aa78c9 100644
--- a/include/uapi/linux/sev-guest.h
+++ b/include/uapi/linux/sev-guest.h
@@ -52,8 +52,15 @@  struct snp_guest_request_ioctl {
 	__u64 req_data;
 	__u64 resp_data;
 
-	/* firmware error code on failure (see psp-sev.h) */
-	__u64 fw_err;
+	/* bits[63:32]: VMM error code, bits[31:0] firmware error code (see psp-sev.h) */
+	union {
+		__u64 exitinfo2;
+		__u64 fw_err; /* Name deprecated in favor of others */
+		struct {
+			__u32 fw_error;
+			__u32 vmm_error;
+		};
+	};
 };
 
 struct snp_ext_report_req {
@@ -77,4 +84,11 @@  struct snp_ext_report_req {
 /* Get SNP extended report as defined in the GHCB specification version 2. */
 #define SNP_GET_EXT_REPORT _IOWR(SNP_GUEST_REQ_IOC_TYPE, 0x2, struct snp_guest_request_ioctl)
 
+/* Guest message request EXIT_INFO_2 constants */
+#define SNP_GUEST_FW_ERR_MASK		GENMASK_ULL(31, 0)
+#define SNP_GUEST_VMM_ERR_SHIFT		32
+
+#define SNP_GUEST_VMM_ERR_INVALID_LEN	1
+#define SNP_GUEST_VMM_ERR_BUSY		2
+
 #endif /* __UAPI_LINUX_SEV_GUEST_H_ */