x86/sev: Fix SNP CPUID requests to the hypervisor
Commit Message
SNP retrieves the majority of CPUID information from the SNP CPUID page.
But there are times when that information needs to be supplemented by the
hypervisor, for example, obtaining the initial APIC ID of the vCPU from
leaf 1.
The current implementation uses the MSR protocol to retrieve the data from
the hypervisor, even when a GHCB exists. The problem arises when an NMI
arrives on return from the VMGEXIT. The NMI will be immediately serviced
and may generate a #VC requiring communication with the hypervisor. Since
a GHCB exists in this case, it will be used. As part of using the GHCB,
the #VC handler will write the GHCB physical address into the GHCB MSR and
the #VC will be handled. When the NMI completes, processing resumes at the
site of the VMGEXIT, which is expecting to read the GHCB MSR and find a
CPUID MSR protocol response. Since the NMI handling overwrote the GHCB MSR
response, the guest will see an invalid reply from the hypervisor and
self-terminate.
Fix this problem by using the GHCB when it is available. Any NMI received
is properly handled because the GHCB contents are copied into a backup
page and restored on NMI exit, thus preserving the active GHCB request or
result.
Fixes: ee0bfa08a345 ("x86/compressed/64: Add support for SEV-SNP CPUID table in #VC handlers")
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
---
arch/x86/kernel/sev-shared.c | 69 ++++++++++++++++++++++++++++--------
1 file changed, 55 insertions(+), 14 deletions(-)
Comments
> Fixes: ee0bfa08a345 ("x86/compressed/64: Add support for SEV-SNP CPUID table in #VC handlers")
> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Nice catch.
> +static int __sev_cpuid_hv_ghcb(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
> +{
> + u32 cr4 = native_read_cr4();
> + int ret;
> +
> + ghcb_set_rax(ghcb, leaf->fn);
> + ghcb_set_rcx(ghcb, leaf->subfn);
> +
> + if (cr4 & X86_CR4_OSXSAVE)
> + /* Safe to read xcr0 */
> + ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK));
> + else
> + /* xgetbv will cause #GP - use reset value for xcr0 */
> + ghcb_set_xcr0(ghcb, 1);
> +
Everything looks good except I'm confused by this last comment. I
thought xgetbv would #UD if OSXSAVE isn't set in cr4. Is that what
happens after you set it to 1 as some kind of workaround?
....oh I think I do remember hitting this weird technicality when
setting up CPUID support. I think it'd be helpful to document a little
bit more why the ghcb's value is xcr0 is relevant at all when cr4's
OSXSAVE bit is 0 though.
On 7/31/23 22:58, Dionna Amalie Glaze wrote:
>> Fixes: ee0bfa08a345 ("x86/compressed/64: Add support for SEV-SNP CPUID table in #VC handlers")
>> Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
>
> Nice catch.
>
>> +static int __sev_cpuid_hv_ghcb(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
>> +{
>> + u32 cr4 = native_read_cr4();
>> + int ret;
>> +
>> + ghcb_set_rax(ghcb, leaf->fn);
>> + ghcb_set_rcx(ghcb, leaf->subfn);
>> +
>> + if (cr4 & X86_CR4_OSXSAVE)
>> + /* Safe to read xcr0 */
>> + ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK));
>> + else
>> + /* xgetbv will cause #GP - use reset value for xcr0 */
>> + ghcb_set_xcr0(ghcb, 1);
>> +
>
> Everything looks good except I'm confused by this last comment. I
> thought xgetbv would #UD if OSXSAVE isn't set in cr4. Is that what
> happens after you set it to 1 as some kind of workaround?
Yes, after checking the APM, it would #UD. This code was copied from the
existing CPUID #VC support. Not sure it requires a new version or if the
maintainers could fix it up when applying.
Thanks,
Tom
>
> ....oh I think I do remember hitting this weird technicality when
> setting up CPUID support. I think it'd be helpful to document a little
> bit more why the ghcb's value is xcr0 is relevant at all when cr4's
> OSXSAVE bit is 0 though.
>
>
@@ -256,7 +256,7 @@ static int __sev_cpuid_hv(u32 fn, int reg_idx, u32 *reg)
return 0;
}
-static int sev_cpuid_hv(struct cpuid_leaf *leaf)
+static int __sev_cpuid_hv_msr(struct cpuid_leaf *leaf)
{
int ret;
@@ -279,6 +279,45 @@ static int sev_cpuid_hv(struct cpuid_leaf *leaf)
return ret;
}
+static int __sev_cpuid_hv_ghcb(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
+{
+ u32 cr4 = native_read_cr4();
+ int ret;
+
+ ghcb_set_rax(ghcb, leaf->fn);
+ ghcb_set_rcx(ghcb, leaf->subfn);
+
+ if (cr4 & X86_CR4_OSXSAVE)
+ /* Safe to read xcr0 */
+ ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK));
+ else
+ /* xgetbv will cause #GP - use reset value for xcr0 */
+ ghcb_set_xcr0(ghcb, 1);
+
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0);
+ if (ret != ES_OK)
+ return ret;
+
+ if (!(ghcb_rax_is_valid(ghcb) &&
+ ghcb_rbx_is_valid(ghcb) &&
+ ghcb_rcx_is_valid(ghcb) &&
+ ghcb_rdx_is_valid(ghcb)))
+ return ES_VMM_ERROR;
+
+ leaf->eax = ghcb->save.rax;
+ leaf->ebx = ghcb->save.rbx;
+ leaf->ecx = ghcb->save.rcx;
+ leaf->edx = ghcb->save.rdx;
+
+ return ES_OK;
+}
+
+static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
+{
+ return ghcb ? __sev_cpuid_hv_ghcb(ghcb, ctxt, leaf)
+ : __sev_cpuid_hv_msr(leaf);
+}
+
/*
* This may be called early while still running on the initial identity
* mapping. Use RIP-relative addressing to obtain the correct address
@@ -388,19 +427,20 @@ snp_cpuid_get_validated_func(struct cpuid_leaf *leaf)
return false;
}
-static void snp_cpuid_hv(struct cpuid_leaf *leaf)
+static void snp_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
{
- if (sev_cpuid_hv(leaf))
+ if (sev_cpuid_hv(ghcb, ctxt, leaf))
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_CPUID_HV);
}
-static int snp_cpuid_postprocess(struct cpuid_leaf *leaf)
+static int snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
+ struct cpuid_leaf *leaf)
{
struct cpuid_leaf leaf_hv = *leaf;
switch (leaf->fn) {
case 0x1:
- snp_cpuid_hv(&leaf_hv);
+ snp_cpuid_hv(ghcb, ctxt, &leaf_hv);
/* initial APIC ID */
leaf->ebx = (leaf_hv.ebx & GENMASK(31, 24)) | (leaf->ebx & GENMASK(23, 0));
@@ -419,7 +459,7 @@ static int snp_cpuid_postprocess(struct cpuid_leaf *leaf)
break;
case 0xB:
leaf_hv.subfn = 0;
- snp_cpuid_hv(&leaf_hv);
+ snp_cpuid_hv(ghcb, ctxt, &leaf_hv);
/* extended APIC ID */
leaf->edx = leaf_hv.edx;
@@ -467,7 +507,7 @@ static int snp_cpuid_postprocess(struct cpuid_leaf *leaf)
}
break;
case 0x8000001E:
- snp_cpuid_hv(&leaf_hv);
+ snp_cpuid_hv(ghcb, ctxt, &leaf_hv);
/* extended APIC ID */
leaf->eax = leaf_hv.eax;
@@ -488,7 +528,7 @@ static int snp_cpuid_postprocess(struct cpuid_leaf *leaf)
* Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value
* should be treated as fatal by caller.
*/
-static int snp_cpuid(struct cpuid_leaf *leaf)
+static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
{
const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
@@ -522,7 +562,7 @@ static int snp_cpuid(struct cpuid_leaf *leaf)
return 0;
}
- return snp_cpuid_postprocess(leaf);
+ return snp_cpuid_postprocess(ghcb, ctxt, leaf);
}
/*
@@ -544,14 +584,14 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
leaf.fn = fn;
leaf.subfn = subfn;
- ret = snp_cpuid(&leaf);
+ ret = snp_cpuid(NULL, NULL, &leaf);
if (!ret)
goto cpuid_done;
if (ret != -EOPNOTSUPP)
goto fail;
- if (sev_cpuid_hv(&leaf))
+ if (__sev_cpuid_hv_msr(&leaf))
goto fail;
cpuid_done:
@@ -848,14 +888,15 @@ static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
return ret;
}
-static int vc_handle_cpuid_snp(struct pt_regs *regs)
+static int vc_handle_cpuid_snp(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
{
+ struct pt_regs *regs = ctxt->regs;
struct cpuid_leaf leaf;
int ret;
leaf.fn = regs->ax;
leaf.subfn = regs->cx;
- ret = snp_cpuid(&leaf);
+ ret = snp_cpuid(ghcb, ctxt, &leaf);
if (!ret) {
regs->ax = leaf.eax;
regs->bx = leaf.ebx;
@@ -874,7 +915,7 @@ static enum es_result vc_handle_cpuid(struct ghcb *ghcb,
enum es_result ret;
int snp_cpuid_ret;
- snp_cpuid_ret = vc_handle_cpuid_snp(regs);
+ snp_cpuid_ret = vc_handle_cpuid_snp(ghcb, ctxt);
if (!snp_cpuid_ret)
return ES_OK;
if (snp_cpuid_ret != -EOPNOTSUPP)