[v10,098/108] KVM: TDX: Implement callbacks for MSR operations for TDX

Message ID 1cacbda18e3c7dcccd92a7390b0ca7f4ba073f85.1667110240.git.isaku.yamahata@intel.com
State New
Headers
Series KVM TDX basic feature support |

Commit Message

Isaku Yamahata Oct. 30, 2022, 6:23 a.m. UTC
  From: Isaku Yamahata <isaku.yamahata@intel.com>

Implements set_msr/get_msr/has_emulated_msr methods for TDX to handle
hypercall from guest TD for paravirtualized rdmsr and wrmsr.  The TDX
module virtualizes MSRs.  For some MSRs, it injects #VE to the guest TD
upon RDMSR or WRMSR.  The exact list of such MSRs are defined in the spec.

Upon #VE, the guest TD may execute hypercalls,
TDG.VP.VMCALL<INSTRUCTION.RDMSR> and TDG.VP.VMCALL<INSTRUCTION.WRMSR>,
which are defined in GHCI (Guest-Host Communication Interface) so that the
host VMM (e.g. KVM) can virtualizes the MSRs.

Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/main.c    | 34 ++++++++++++++++++--
 arch/x86/kvm/vmx/tdx.c     | 63 ++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/vmx/x86_ops.h |  6 ++++
 arch/x86/kvm/x86.c         |  1 -
 arch/x86/kvm/x86.h         |  2 ++
 5 files changed, 102 insertions(+), 4 deletions(-)
  

Comments

Binbin Wu Nov. 23, 2022, 2:25 p.m. UTC | #1
On 10/30/2022 2:23 PM, isaku.yamahata@intel.com wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
>
> Implements set_msr/get_msr/has_emulated_msr methods for TDX to handle
> hypercall from guest TD for paravirtualized rdmsr and wrmsr.  The TDX
> module virtualizes MSRs.  For some MSRs, it injects #VE to the guest TD
> upon RDMSR or WRMSR.  The exact list of such MSRs are defined in the spec.

How does KVM handle the case that the userland wants to get the TD MSR list?

KVM_GET_MSR_INDEX_LIST will return the global KVM MSR list, which is 
different from the TD version.


>
> Upon #VE, the guest TD may execute hypercalls,
> TDG.VP.VMCALL<INSTRUCTION.RDMSR> and TDG.VP.VMCALL<INSTRUCTION.WRMSR>,
> which are defined in GHCI (Guest-Host Communication Interface) so that the
> host VMM (e.g. KVM) can virtualizes
can virtualize



> the MSRs.
>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>   arch/x86/kvm/vmx/main.c    | 34 ++++++++++++++++++--
>   arch/x86/kvm/vmx/tdx.c     | 63 ++++++++++++++++++++++++++++++++++++++
>   arch/x86/kvm/vmx/x86_ops.h |  6 ++++
>   arch/x86/kvm/x86.c         |  1 -
>   arch/x86/kvm/x86.h         |  2 ++
>   5 files changed, 102 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
> index 74c561e3eb46..3a8a4fdf1ce7 100644
> --- a/arch/x86/kvm/vmx/main.c
> +++ b/arch/x86/kvm/vmx/main.c
> @@ -195,6 +195,34 @@ static void vt_handle_exit_irqoff(struct kvm_vcpu *vcpu)
>   	vmx_handle_exit_irqoff(vcpu);
>   }
>   
> +static int vt_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> +{
> +	if (unlikely(is_td_vcpu(vcpu)))
> +		return tdx_set_msr(vcpu, msr_info);
> +
> +	return vmx_set_msr(vcpu, msr_info);
> +}
> +
> +/*
> + * The kvm parameter can be NULL (module initialization, or invocation before
> + * VM creation). Be sure to check the kvm parameter before using it.
> + */
> +static bool vt_has_emulated_msr(struct kvm *kvm, u32 index)
> +{
> +	if (kvm && is_td(kvm))
> +		return tdx_is_emulated_msr(index, true);
> +
> +	return vmx_has_emulated_msr(kvm, index);
> +}
> +
> +static int vt_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> +{
> +	if (unlikely(is_td_vcpu(vcpu)))
> +		return tdx_get_msr(vcpu, msr_info);
> +
> +	return vmx_get_msr(vcpu, msr_info);
> +}
> +
>   static void vt_apicv_post_state_restore(struct kvm_vcpu *vcpu)
>   {
>   	struct pi_desc *pi = vcpu_to_pi_desc(vcpu);
> @@ -431,7 +459,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
>   
>   	.hardware_enable = vt_hardware_enable,
>   	.hardware_disable = vt_hardware_disable,
> -	.has_emulated_msr = vmx_has_emulated_msr,
> +	.has_emulated_msr = vt_has_emulated_msr,
>   
>   	.is_vm_type_supported = vt_is_vm_type_supported,
>   	.vm_size = sizeof(struct kvm_vmx),
> @@ -451,8 +479,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
>   
>   	.update_exception_bitmap = vmx_update_exception_bitmap,
>   	.get_msr_feature = vmx_get_msr_feature,
> -	.get_msr = vmx_get_msr,
> -	.set_msr = vmx_set_msr,
> +	.get_msr = vt_get_msr,
> +	.set_msr = vt_set_msr,
>   	.get_segment_base = vmx_get_segment_base,
>   	.get_segment = vmx_get_segment,
>   	.set_segment = vmx_set_segment,
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> index 50e9352464a9..b820475ce0ab 100644
> --- a/arch/x86/kvm/vmx/tdx.c
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -1489,6 +1489,69 @@ void tdx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
>   	*error_code = 0;
>   }
>   
> +bool tdx_is_emulated_msr(u32 index, bool write)
> +{
> +	switch (index) {
> +	case MSR_IA32_UCODE_REV:
> +	case MSR_IA32_ARCH_CAPABILITIES:
> +	case MSR_IA32_POWER_CTL:
> +	case MSR_MTRRcap:
> +	case 0x200 ... 0x26f:
> +		/* IA32_MTRR_PHYS{BASE, MASK}, IA32_MTRR_FIX*_* */
> +	case MSR_IA32_CR_PAT:
> +	case MSR_MTRRdefType:
> +	case MSR_IA32_TSC_DEADLINE:
> +	case MSR_IA32_MISC_ENABLE:
> +	case MSR_KVM_STEAL_TIME:
> +	case MSR_KVM_POLL_CONTROL:
> +	case MSR_PLATFORM_INFO:
> +	case MSR_MISC_FEATURES_ENABLES:
> +	case MSR_IA32_MCG_CAP:
> +	case MSR_IA32_MCG_STATUS:
> +	case MSR_IA32_MCG_CTL:
> +	case MSR_IA32_MCG_EXT_CTL:
> +	case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
> +	case MSR_IA32_MC0_CTL2 ... MSR_IA32_MCx_CTL2(KVM_MAX_MCE_BANKS) - 1:
> +		/* MSR_IA32_MCx_{CTL, STATUS, ADDR, MISC, CTL2} */
> +		return true;
> +	case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff:
> +		/*
> +		 * x2APIC registers that are virtualized by the CPU can't be
> +		 * emulated, KVM doesn't have access to the virtual APIC page.
> +		 */
> +		switch (index) {
> +		case X2APIC_MSR(APIC_TASKPRI):
> +		case X2APIC_MSR(APIC_PROCPRI):
> +		case X2APIC_MSR(APIC_EOI):
> +		case X2APIC_MSR(APIC_ISR) ... X2APIC_MSR(APIC_ISR + APIC_ISR_NR):
> +		case X2APIC_MSR(APIC_TMR) ... X2APIC_MSR(APIC_TMR + APIC_ISR_NR):
> +		case X2APIC_MSR(APIC_IRR) ... X2APIC_MSR(APIC_IRR + APIC_ISR_NR):
> +			return false;
> +		default:
> +			return true;
> +		}
> +	case MSR_IA32_APICBASE:
> +	case MSR_EFER:
> +		return !write;
> +	default:
> +		return false;
> +	}
> +}
> +
> +int tdx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
> +{
> +	if (tdx_is_emulated_msr(msr->index, false))
> +		return kvm_get_msr_common(vcpu, msr);
> +	return 1;
> +}
> +
> +int tdx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
> +{
> +	if (tdx_is_emulated_msr(msr->index, true))
> +		return kvm_set_msr_common(vcpu, msr);
> +	return 1;
> +}
> +
>   int tdx_dev_ioctl(void __user *argp)
>   {
>   	struct kvm_tdx_capabilities __user *user_caps;
> diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
> index d02619f64b6e..8b9399b154f3 100644
> --- a/arch/x86/kvm/vmx/x86_ops.h
> +++ b/arch/x86/kvm/vmx/x86_ops.h
> @@ -162,6 +162,9 @@ void tdx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
>   void tdx_inject_nmi(struct kvm_vcpu *vcpu);
>   void tdx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
>   		u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code);
> +bool tdx_is_emulated_msr(u32 index, bool write);
> +int tdx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
> +int tdx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
>   
>   int tdx_vm_ioctl(struct kvm *kvm, void __user *argp);
>   int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp);
> @@ -200,6 +203,9 @@ static inline void tdx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mo
>   static inline void tdx_inject_nmi(struct kvm_vcpu *vcpu) {}
>   static inline void tdx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason, u64 *info1,
>   				     u64 *info2, u32 *intr_info, u32 *error_code) {}
> +static inline bool tdx_is_emulated_msr(u32 index, bool write) { return false; }
> +static inline int tdx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) { return 1; }
> +static inline int tdx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) { return 1; }
>   
>   static inline int tdx_vm_ioctl(struct kvm *kvm, void __user *argp) { return -EOPNOTSUPP; }
>   static inline int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) { return -EOPNOTSUPP; }
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 2eacc4929d5d..5304b27f2566 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -85,7 +85,6 @@
>   #include "trace.h"
>   
>   #define MAX_IO_MSRS 256
> -#define KVM_MAX_MCE_BANKS 32
>   
>   struct kvm_caps kvm_caps __read_mostly = {
>   	.supported_mce_cap = MCG_CTL_P | MCG_SER_P,
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 829d3134c1eb..1d77c39821ae 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -8,6 +8,8 @@
>   #include "kvm_cache_regs.h"
>   #include "kvm_emulate.h"
>   
> +#define KVM_MAX_MCE_BANKS 32
> +
>   struct kvm_caps {
>   	/* control of guest tsc rate supported? */
>   	bool has_tsc_control;
  
Kai Huang Dec. 14, 2022, 11:22 a.m. UTC | #2
On Sat, 2022-10-29 at 23:23 -0700, isaku.yamahata@intel.com wrote:
> +bool tdx_is_emulated_msr(u32 index, bool write)
> +{
> +	switch (index) {
> +	case MSR_IA32_UCODE_REV:
> +	case MSR_IA32_ARCH_CAPABILITIES:
> +	case MSR_IA32_POWER_CTL:
> +	case MSR_MTRRcap:
> +	case 0x200 ... 0x26f:
> +		/* IA32_MTRR_PHYS{BASE, MASK}, IA32_MTRR_FIX*_* */
> +	case MSR_IA32_CR_PAT:
> +	case MSR_MTRRdefType:
> +	case MSR_IA32_TSC_DEADLINE:
> +	case MSR_IA32_MISC_ENABLE:
> +	case MSR_KVM_STEAL_TIME:
> +	case MSR_KVM_POLL_CONTROL:

To me putting KVM para-virt MSRs and hardware MSRs together isn't good idea. 
You can introduce separate helpers for them.

> +	case MSR_PLATFORM_INFO:
> +	case MSR_MISC_FEATURES_ENABLES:
> +	case MSR_IA32_MCG_CAP:
> +	case MSR_IA32_MCG_STATUS:
> +	case MSR_IA32_MCG_CTL:
> +	case MSR_IA32_MCG_EXT_CTL:
> +	case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
> +	case MSR_IA32_MC0_CTL2 ... MSR_IA32_MCx_CTL2(KVM_MAX_MCE_BANKS) - 1:
> +		/* MSR_IA32_MCx_{CTL, STATUS, ADDR, MISC, CTL2} */
> +		return true;
> +	case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff:
> +		/*
> +		 * x2APIC registers that are virtualized by the CPU can't be
> +		 * emulated, KVM doesn't have access to the virtual APIC page.
> +		 */
> +		switch (index) {
> +		case X2APIC_MSR(APIC_TASKPRI):
> +		case X2APIC_MSR(APIC_PROCPRI):
> +		case X2APIC_MSR(APIC_EOI):
> +		case X2APIC_MSR(APIC_ISR) ... X2APIC_MSR(APIC_ISR + APIC_ISR_NR):
> +		case X2APIC_MSR(APIC_TMR) ... X2APIC_MSR(APIC_TMR + APIC_ISR_NR):
> +		case X2APIC_MSR(APIC_IRR) ... X2APIC_MSR(APIC_IRR + APIC_ISR_NR):
> +			return false;
> +		default:
> +			return true;
> +		}
> +	case MSR_IA32_APICBASE:
> +	case MSR_EFER:
> +		return !write;
> +	default:
> +		return false;
> +	}
> +}
  
Isaku Yamahata Dec. 16, 2022, 1:29 a.m. UTC | #3
On Wed, Nov 23, 2022 at 10:25:11PM +0800,
Binbin Wu <binbin.wu@linux.intel.com> wrote:

> 
> On 10/30/2022 2:23 PM, isaku.yamahata@intel.com wrote:
> > From: Isaku Yamahata <isaku.yamahata@intel.com>
> > 
> > Implements set_msr/get_msr/has_emulated_msr methods for TDX to handle
> > hypercall from guest TD for paravirtualized rdmsr and wrmsr.  The TDX
> > module virtualizes MSRs.  For some MSRs, it injects #VE to the guest TD
> > upon RDMSR or WRMSR.  The exact list of such MSRs are defined in the spec.
> 
> How does KVM handle the case that the userland wants to get the TD MSR list?
> 
> KVM_GET_MSR_INDEX_LIST will return the global KVM MSR list, which is
> different from the TD version.

Do you mean user space VMM (e.g. qemu) by "the userland"?
There are three classes of MSRs virtualization.
- non-configurable: TDX module directly virtualizes it. VMM can't configure.
  the value set by KVM_SET_MSR_INDEX_LIST is ignored.
- configurable: TDX module directly virtualizes it. VMM can configure at the
  VM creation time.
  the value set by KVM_SET_MSR_INDEX_LIST is used.
- #VE
  VMM handles the MSR hypercall by guest TD issues. The value set by
  KVM_SET_MSR_INDEX_LIST is used.


> > Upon #VE, the guest TD may execute hypercalls,
> > TDG.VP.VMCALL<INSTRUCTION.RDMSR> and TDG.VP.VMCALL<INSTRUCTION.WRMSR>,
> > which are defined in GHCI (Guest-Host Communication Interface) so that the
> > host VMM (e.g. KVM) can virtualizes
> can virtualize

Yes, will fix.
  
Isaku Yamahata Dec. 16, 2022, 1:39 a.m. UTC | #4
On Wed, Dec 14, 2022 at 11:22:02AM +0000,
"Huang, Kai" <kai.huang@intel.com> wrote:

> On Sat, 2022-10-29 at 23:23 -0700, isaku.yamahata@intel.com wrote:
> > +bool tdx_is_emulated_msr(u32 index, bool write)
> > +{
> > +	switch (index) {
> > +	case MSR_IA32_UCODE_REV:
> > +	case MSR_IA32_ARCH_CAPABILITIES:
> > +	case MSR_IA32_POWER_CTL:
> > +	case MSR_MTRRcap:
> > +	case 0x200 ... 0x26f:
> > +		/* IA32_MTRR_PHYS{BASE, MASK}, IA32_MTRR_FIX*_* */
> > +	case MSR_IA32_CR_PAT:
> > +	case MSR_MTRRdefType:
> > +	case MSR_IA32_TSC_DEADLINE:
> > +	case MSR_IA32_MISC_ENABLE:
> > +	case MSR_KVM_STEAL_TIME:
> > +	case MSR_KVM_POLL_CONTROL:
> 
> To me putting KVM para-virt MSRs and hardware MSRs together isn't good idea. 
> You can introduce separate helpers for them.

Makes sense. updated as follows.

diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index ee8d395af849..b4132167e2bb 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -1606,6 +1606,17 @@ void tdx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
        *error_code = 0;
 }
 
+static bool tdx_is_emulated_kvm_msr(u32 index, bool write)
+{
+       switch (index) {
+       case MSR_KVM_STEAL_TIME:
+       case MSR_KVM_POLL_CONTROL:
+               return true;
+       default:
+               return false;
+       }
+}
+
 bool tdx_is_emulated_msr(u32 index, bool write)
 {
        switch (index) {
@@ -1619,8 +1630,6 @@ bool tdx_is_emulated_msr(u32 index, bool write)
        case MSR_MTRRdefType:
        case MSR_IA32_TSC_DEADLINE:
        case MSR_IA32_MISC_ENABLE:
-       case MSR_KVM_STEAL_TIME:
-       case MSR_KVM_POLL_CONTROL:
        case MSR_PLATFORM_INFO:
        case MSR_MISC_FEATURES_ENABLES:
        case MSR_IA32_MCG_CAP:
@@ -1650,6 +1659,9 @@ bool tdx_is_emulated_msr(u32 index, bool write)
        case MSR_IA32_APICBASE:
        case MSR_EFER:
                return !write;
+       case 0x4b564d00 ... 0x4b564dff:
+               /* KVM custom MSRs */
+               return tdx_is_emulated_kvm_msr(index, write);
        default:
                return false;
        }
  
Ackerley Tng Jan. 4, 2023, 9:20 p.m. UTC | #5
I believe we should also have a handler for .msr_filter_changed.

Without an .msr_filter_changed handler, a host crash can occur if we
first set up a vcpu for the TD, and then set an MSR filter.

If we first set up a vcpu for the TD, and then set an MSR filter, upon
vcpu_enter_guest, the .msr_filter_changed handler (currently
vmx_msr_filter_changed()) will be invoked. to_vmx(vcpu) interprets the
containing struct of struct kvm_vcpu to be a struct vcpu_vmx instead of
a struct vcpu_tdx.

In my case, I was working on a selftest and the missing handler caused a
NULL dereference in vmx_disable_intercept_for_msr() because
vmx->vmcs01.msr_bitmap is NULL.
  
Isaku Yamahata Jan. 12, 2023, 10:06 a.m. UTC | #6
On Wed, Jan 04, 2023 at 01:20:13PM -0800,
Ackerley Tng <ackerleytng@google.com> wrote:

> 
> I believe we should also have a handler for .msr_filter_changed.
> 
> Without an .msr_filter_changed handler, a host crash can occur if we
> first set up a vcpu for the TD, and then set an MSR filter.
> 
> If we first set up a vcpu for the TD, and then set an MSR filter, upon
> vcpu_enter_guest, the .msr_filter_changed handler (currently
> vmx_msr_filter_changed()) will be invoked. to_vmx(vcpu) interprets the
> containing struct of struct kvm_vcpu to be a struct vcpu_vmx instead of
> a struct vcpu_tdx.
> 
> In my case, I was working on a selftest and the missing handler caused a
> NULL dereference in vmx_disable_intercept_for_msr() because
> vmx->vmcs01.msr_bitmap is NULL.

Nice catch. I'll fix it.
  

Patch

diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 74c561e3eb46..3a8a4fdf1ce7 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -195,6 +195,34 @@  static void vt_handle_exit_irqoff(struct kvm_vcpu *vcpu)
 	vmx_handle_exit_irqoff(vcpu);
 }
 
+static int vt_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+{
+	if (unlikely(is_td_vcpu(vcpu)))
+		return tdx_set_msr(vcpu, msr_info);
+
+	return vmx_set_msr(vcpu, msr_info);
+}
+
+/*
+ * The kvm parameter can be NULL (module initialization, or invocation before
+ * VM creation). Be sure to check the kvm parameter before using it.
+ */
+static bool vt_has_emulated_msr(struct kvm *kvm, u32 index)
+{
+	if (kvm && is_td(kvm))
+		return tdx_is_emulated_msr(index, true);
+
+	return vmx_has_emulated_msr(kvm, index);
+}
+
+static int vt_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+{
+	if (unlikely(is_td_vcpu(vcpu)))
+		return tdx_get_msr(vcpu, msr_info);
+
+	return vmx_get_msr(vcpu, msr_info);
+}
+
 static void vt_apicv_post_state_restore(struct kvm_vcpu *vcpu)
 {
 	struct pi_desc *pi = vcpu_to_pi_desc(vcpu);
@@ -431,7 +459,7 @@  struct kvm_x86_ops vt_x86_ops __initdata = {
 
 	.hardware_enable = vt_hardware_enable,
 	.hardware_disable = vt_hardware_disable,
-	.has_emulated_msr = vmx_has_emulated_msr,
+	.has_emulated_msr = vt_has_emulated_msr,
 
 	.is_vm_type_supported = vt_is_vm_type_supported,
 	.vm_size = sizeof(struct kvm_vmx),
@@ -451,8 +479,8 @@  struct kvm_x86_ops vt_x86_ops __initdata = {
 
 	.update_exception_bitmap = vmx_update_exception_bitmap,
 	.get_msr_feature = vmx_get_msr_feature,
-	.get_msr = vmx_get_msr,
-	.set_msr = vmx_set_msr,
+	.get_msr = vt_get_msr,
+	.set_msr = vt_set_msr,
 	.get_segment_base = vmx_get_segment_base,
 	.get_segment = vmx_get_segment,
 	.set_segment = vmx_set_segment,
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 50e9352464a9..b820475ce0ab 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -1489,6 +1489,69 @@  void tdx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
 	*error_code = 0;
 }
 
+bool tdx_is_emulated_msr(u32 index, bool write)
+{
+	switch (index) {
+	case MSR_IA32_UCODE_REV:
+	case MSR_IA32_ARCH_CAPABILITIES:
+	case MSR_IA32_POWER_CTL:
+	case MSR_MTRRcap:
+	case 0x200 ... 0x26f:
+		/* IA32_MTRR_PHYS{BASE, MASK}, IA32_MTRR_FIX*_* */
+	case MSR_IA32_CR_PAT:
+	case MSR_MTRRdefType:
+	case MSR_IA32_TSC_DEADLINE:
+	case MSR_IA32_MISC_ENABLE:
+	case MSR_KVM_STEAL_TIME:
+	case MSR_KVM_POLL_CONTROL:
+	case MSR_PLATFORM_INFO:
+	case MSR_MISC_FEATURES_ENABLES:
+	case MSR_IA32_MCG_CAP:
+	case MSR_IA32_MCG_STATUS:
+	case MSR_IA32_MCG_CTL:
+	case MSR_IA32_MCG_EXT_CTL:
+	case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
+	case MSR_IA32_MC0_CTL2 ... MSR_IA32_MCx_CTL2(KVM_MAX_MCE_BANKS) - 1:
+		/* MSR_IA32_MCx_{CTL, STATUS, ADDR, MISC, CTL2} */
+		return true;
+	case APIC_BASE_MSR ... APIC_BASE_MSR + 0xff:
+		/*
+		 * x2APIC registers that are virtualized by the CPU can't be
+		 * emulated, KVM doesn't have access to the virtual APIC page.
+		 */
+		switch (index) {
+		case X2APIC_MSR(APIC_TASKPRI):
+		case X2APIC_MSR(APIC_PROCPRI):
+		case X2APIC_MSR(APIC_EOI):
+		case X2APIC_MSR(APIC_ISR) ... X2APIC_MSR(APIC_ISR + APIC_ISR_NR):
+		case X2APIC_MSR(APIC_TMR) ... X2APIC_MSR(APIC_TMR + APIC_ISR_NR):
+		case X2APIC_MSR(APIC_IRR) ... X2APIC_MSR(APIC_IRR + APIC_ISR_NR):
+			return false;
+		default:
+			return true;
+		}
+	case MSR_IA32_APICBASE:
+	case MSR_EFER:
+		return !write;
+	default:
+		return false;
+	}
+}
+
+int tdx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
+{
+	if (tdx_is_emulated_msr(msr->index, false))
+		return kvm_get_msr_common(vcpu, msr);
+	return 1;
+}
+
+int tdx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
+{
+	if (tdx_is_emulated_msr(msr->index, true))
+		return kvm_set_msr_common(vcpu, msr);
+	return 1;
+}
+
 int tdx_dev_ioctl(void __user *argp)
 {
 	struct kvm_tdx_capabilities __user *user_caps;
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index d02619f64b6e..8b9399b154f3 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -162,6 +162,9 @@  void tdx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
 void tdx_inject_nmi(struct kvm_vcpu *vcpu);
 void tdx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason,
 		u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code);
+bool tdx_is_emulated_msr(u32 index, bool write);
+int tdx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
+int tdx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
 
 int tdx_vm_ioctl(struct kvm *kvm, void __user *argp);
 int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp);
@@ -200,6 +203,9 @@  static inline void tdx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mo
 static inline void tdx_inject_nmi(struct kvm_vcpu *vcpu) {}
 static inline void tdx_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason, u64 *info1,
 				     u64 *info2, u32 *intr_info, u32 *error_code) {}
+static inline bool tdx_is_emulated_msr(u32 index, bool write) { return false; }
+static inline int tdx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) { return 1; }
+static inline int tdx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) { return 1; }
 
 static inline int tdx_vm_ioctl(struct kvm *kvm, void __user *argp) { return -EOPNOTSUPP; }
 static inline int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) { return -EOPNOTSUPP; }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2eacc4929d5d..5304b27f2566 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -85,7 +85,6 @@ 
 #include "trace.h"
 
 #define MAX_IO_MSRS 256
-#define KVM_MAX_MCE_BANKS 32
 
 struct kvm_caps kvm_caps __read_mostly = {
 	.supported_mce_cap = MCG_CTL_P | MCG_SER_P,
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 829d3134c1eb..1d77c39821ae 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -8,6 +8,8 @@ 
 #include "kvm_cache_regs.h"
 #include "kvm_emulate.h"
 
+#define KVM_MAX_MCE_BANKS 32
+
 struct kvm_caps {
 	/* control of guest tsc rate supported? */
 	bool has_tsc_control;