From: Isaku Yamahata <isaku.yamahata@intel.com>
As TDX will use posted_interrupt.c, the use of struct vcpu_vmx is a
blocker. Because the members of struct pi_desc pi_desc and struct
list_head pi_wakeup_list are only used in posted_interrupt.c, introduce
common structure, struct vcpu_pi, make vcpu_vmx and vcpu_tdx has same
layout in the top of structure.
To minimize the diff size, avoid code conversion like,
vmx->pi_desc => vmx->common->pi_desc. Instead add compile time check
if the layout is expected.
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
arch/x86/kvm/vmx/posted_intr.c | 41 ++++++++++++++++++++++++++--------
arch/x86/kvm/vmx/posted_intr.h | 11 +++++++++
arch/x86/kvm/vmx/tdx.c | 1 +
arch/x86/kvm/vmx/tdx.h | 8 +++++++
arch/x86/kvm/vmx/vmx.h | 14 +++++++-----
5 files changed, 60 insertions(+), 15 deletions(-)
On 2/26/2024 4:26 PM, isaku.yamahata@intel.com wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
>
> As TDX will use posted_interrupt.c, the use of struct vcpu_vmx is a
> blocker. Because the members of
Extra "of"
> struct pi_desc pi_desc and struct
> list_head pi_wakeup_list are only used in posted_interrupt.c, introduce
> common structure, struct vcpu_pi, make vcpu_vmx and vcpu_tdx has same
> layout in the top of structure.
>
> To minimize the diff size, avoid code conversion like,
> vmx->pi_desc => vmx->common->pi_desc. Instead add compile time check
> if the layout is expected.
>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> ---
> arch/x86/kvm/vmx/posted_intr.c | 41 ++++++++++++++++++++++++++--------
> arch/x86/kvm/vmx/posted_intr.h | 11 +++++++++
> arch/x86/kvm/vmx/tdx.c | 1 +
> arch/x86/kvm/vmx/tdx.h | 8 +++++++
> arch/x86/kvm/vmx/vmx.h | 14 +++++++-----
> 5 files changed, 60 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c
> index af662312fd07..b66add9da0f3 100644
> --- a/arch/x86/kvm/vmx/posted_intr.c
> +++ b/arch/x86/kvm/vmx/posted_intr.c
> @@ -11,6 +11,7 @@
> #include "posted_intr.h"
> #include "trace.h"
> #include "vmx.h"
> +#include "tdx.h"
>
> /*
> * Maintain a per-CPU list of vCPUs that need to be awakened by wakeup_handler()
> @@ -31,9 +32,29 @@ static DEFINE_PER_CPU(struct list_head, wakeup_vcpus_on_cpu);
> */
> static DEFINE_PER_CPU(raw_spinlock_t, wakeup_vcpus_on_cpu_lock);
>
> +/*
> + * The layout of the head of struct vcpu_vmx and struct vcpu_tdx must match with
> + * struct vcpu_pi.
> + */
> +static_assert(offsetof(struct vcpu_pi, pi_desc) ==
> + offsetof(struct vcpu_vmx, pi_desc));
> +static_assert(offsetof(struct vcpu_pi, pi_wakeup_list) ==
> + offsetof(struct vcpu_vmx, pi_wakeup_list));
> +#ifdef CONFIG_INTEL_TDX_HOST
> +static_assert(offsetof(struct vcpu_pi, pi_desc) ==
> + offsetof(struct vcpu_tdx, pi_desc));
> +static_assert(offsetof(struct vcpu_pi, pi_wakeup_list) ==
> + offsetof(struct vcpu_tdx, pi_wakeup_list));
> +#endif
> +
> +static inline struct vcpu_pi *vcpu_to_pi(struct kvm_vcpu *vcpu)
> +{
> + return (struct vcpu_pi *)vcpu;
> +}
> +
> static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
> {
> - return &(to_vmx(vcpu)->pi_desc);
> + return &vcpu_to_pi(vcpu)->pi_desc;
> }
>
> static int pi_try_set_control(struct pi_desc *pi_desc, u64 *pold, u64 new)
> @@ -52,8 +73,8 @@ static int pi_try_set_control(struct pi_desc *pi_desc, u64 *pold, u64 new)
>
> void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
> {
> - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
> - struct vcpu_vmx *vmx = to_vmx(vcpu);
> + struct vcpu_pi *vcpu_pi = vcpu_to_pi(vcpu);
> + struct pi_desc *pi_desc = &vcpu_pi->pi_desc;
> struct pi_desc old, new;
> unsigned long flags;
> unsigned int dest;
> @@ -90,7 +111,7 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
> */
> if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR) {
> raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
> - list_del(&vmx->pi_wakeup_list);
> + list_del(&vcpu_pi->pi_wakeup_list);
> raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
> }
>
> @@ -145,15 +166,15 @@ static bool vmx_can_use_vtd_pi(struct kvm *kvm)
> */
> static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu)
> {
> - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
> - struct vcpu_vmx *vmx = to_vmx(vcpu);
> + struct vcpu_pi *vcpu_pi = vcpu_to_pi(vcpu);
> + struct pi_desc *pi_desc = &vcpu_pi->pi_desc;
> struct pi_desc old, new;
> unsigned long flags;
>
> local_irq_save(flags);
>
> raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
> - list_add_tail(&vmx->pi_wakeup_list,
> + list_add_tail(&vcpu_pi->pi_wakeup_list,
> &per_cpu(wakeup_vcpus_on_cpu, vcpu->cpu));
> raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
>
> @@ -190,7 +211,8 @@ static bool vmx_needs_pi_wakeup(struct kvm_vcpu *vcpu)
> * notification vector is switched to the one that calls
> * back to the pi_wakeup_handler() function.
> */
> - return vmx_can_use_ipiv(vcpu) || vmx_can_use_vtd_pi(vcpu->kvm);
> + return (vmx_can_use_ipiv(vcpu) && !is_td_vcpu(vcpu)) ||
> + vmx_can_use_vtd_pi(vcpu->kvm);
> }
>
> void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
> @@ -200,7 +222,8 @@ void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
> if (!vmx_needs_pi_wakeup(vcpu))
> return;
>
> - if (kvm_vcpu_is_blocking(vcpu) && !vmx_interrupt_blocked(vcpu))
> + if (kvm_vcpu_is_blocking(vcpu) &&
> + (is_td_vcpu(vcpu) || !vmx_interrupt_blocked(vcpu)))
> pi_enable_wakeup_handler(vcpu);
>
> /*
> diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h
> index 26992076552e..2fe8222308b2 100644
> --- a/arch/x86/kvm/vmx/posted_intr.h
> +++ b/arch/x86/kvm/vmx/posted_intr.h
> @@ -94,6 +94,17 @@ static inline bool pi_test_sn(struct pi_desc *pi_desc)
> (unsigned long *)&pi_desc->control);
> }
>
> +struct vcpu_pi {
> + struct kvm_vcpu vcpu;
> +
> + /* Posted interrupt descriptor */
> + struct pi_desc pi_desc;
> +
> + /* Used if this vCPU is waiting for PI notification wakeup. */
> + struct list_head pi_wakeup_list;
> + /* Until here common layout betwwn vcpu_vmx and vcpu_tdx. */
s/betwwn/between
Also, in pi_wakeup_handler(), it is still using struct vcpu_vmx, but it
could
be vcpu_tdx.
Functionally it is OK, however, since you have added vcpu_pi, should it use
vcpu_pi instead of vcpu_vmx in pi_wakeup_handler()?
> +};
> +
> void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu);
> void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu);
> void pi_wakeup_handler(void);
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> index a5b52aa6d153..1da58c36217c 100644
> --- a/arch/x86/kvm/vmx/tdx.c
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -584,6 +584,7 @@ int tdx_vcpu_create(struct kvm_vcpu *vcpu)
>
> fpstate_set_confidential(&vcpu->arch.guest_fpu);
> vcpu->arch.apic->guest_apic_protected = true;
> + INIT_LIST_HEAD(&tdx->pi_wakeup_list);
>
> vcpu->arch.efer = EFER_SCE | EFER_LME | EFER_LMA | EFER_NX;
>
> diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h
> index 7f8c78f06508..eaffa7384725 100644
> --- a/arch/x86/kvm/vmx/tdx.h
> +++ b/arch/x86/kvm/vmx/tdx.h
> @@ -4,6 +4,7 @@
>
> #ifdef CONFIG_INTEL_TDX_HOST
>
> +#include "posted_intr.h"
> #include "pmu_intel.h"
> #include "tdx_ops.h"
>
> @@ -69,6 +70,13 @@ union tdx_exit_reason {
> struct vcpu_tdx {
> struct kvm_vcpu vcpu;
>
> + /* Posted interrupt descriptor */
> + struct pi_desc pi_desc;
> +
> + /* Used if this vCPU is waiting for PI notification wakeup. */
> + struct list_head pi_wakeup_list;
> + /* Until here same layout to struct vcpu_pi. */
> +
> unsigned long tdvpr_pa;
> unsigned long *tdvpx_pa;
> bool td_vcpu_created;
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index 79ff54f08fee..634a9a250b95 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -235,6 +235,14 @@ struct nested_vmx {
>
> struct vcpu_vmx {
> struct kvm_vcpu vcpu;
> +
> + /* Posted interrupt descriptor */
> + struct pi_desc pi_desc;
> +
> + /* Used if this vCPU is waiting for PI notification wakeup. */
> + struct list_head pi_wakeup_list;
> + /* Until here same layout to struct vcpu_pi. */
> +
> u8 fail;
> u8 x2apic_msr_bitmap_mode;
>
> @@ -304,12 +312,6 @@ struct vcpu_vmx {
>
> union vmx_exit_reason exit_reason;
>
> - /* Posted interrupt descriptor */
> - struct pi_desc pi_desc;
> -
> - /* Used if this vCPU is waiting for PI notification wakeup. */
> - struct list_head pi_wakeup_list;
> -
> /* Support for a guest hypervisor (nested VMX) */
> struct nested_vmx nested;
>
@@ -11,6 +11,7 @@
#include "posted_intr.h"
#include "trace.h"
#include "vmx.h"
+#include "tdx.h"
/*
* Maintain a per-CPU list of vCPUs that need to be awakened by wakeup_handler()
@@ -31,9 +32,29 @@ static DEFINE_PER_CPU(struct list_head, wakeup_vcpus_on_cpu);
*/
static DEFINE_PER_CPU(raw_spinlock_t, wakeup_vcpus_on_cpu_lock);
+/*
+ * The layout of the head of struct vcpu_vmx and struct vcpu_tdx must match with
+ * struct vcpu_pi.
+ */
+static_assert(offsetof(struct vcpu_pi, pi_desc) ==
+ offsetof(struct vcpu_vmx, pi_desc));
+static_assert(offsetof(struct vcpu_pi, pi_wakeup_list) ==
+ offsetof(struct vcpu_vmx, pi_wakeup_list));
+#ifdef CONFIG_INTEL_TDX_HOST
+static_assert(offsetof(struct vcpu_pi, pi_desc) ==
+ offsetof(struct vcpu_tdx, pi_desc));
+static_assert(offsetof(struct vcpu_pi, pi_wakeup_list) ==
+ offsetof(struct vcpu_tdx, pi_wakeup_list));
+#endif
+
+static inline struct vcpu_pi *vcpu_to_pi(struct kvm_vcpu *vcpu)
+{
+ return (struct vcpu_pi *)vcpu;
+}
+
static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
{
- return &(to_vmx(vcpu)->pi_desc);
+ return &vcpu_to_pi(vcpu)->pi_desc;
}
static int pi_try_set_control(struct pi_desc *pi_desc, u64 *pold, u64 new)
@@ -52,8 +73,8 @@ static int pi_try_set_control(struct pi_desc *pi_desc, u64 *pold, u64 new)
void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
{
- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
- struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct vcpu_pi *vcpu_pi = vcpu_to_pi(vcpu);
+ struct pi_desc *pi_desc = &vcpu_pi->pi_desc;
struct pi_desc old, new;
unsigned long flags;
unsigned int dest;
@@ -90,7 +111,7 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
*/
if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR) {
raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
- list_del(&vmx->pi_wakeup_list);
+ list_del(&vcpu_pi->pi_wakeup_list);
raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
}
@@ -145,15 +166,15 @@ static bool vmx_can_use_vtd_pi(struct kvm *kvm)
*/
static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu)
{
- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
- struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct vcpu_pi *vcpu_pi = vcpu_to_pi(vcpu);
+ struct pi_desc *pi_desc = &vcpu_pi->pi_desc;
struct pi_desc old, new;
unsigned long flags;
local_irq_save(flags);
raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
- list_add_tail(&vmx->pi_wakeup_list,
+ list_add_tail(&vcpu_pi->pi_wakeup_list,
&per_cpu(wakeup_vcpus_on_cpu, vcpu->cpu));
raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
@@ -190,7 +211,8 @@ static bool vmx_needs_pi_wakeup(struct kvm_vcpu *vcpu)
* notification vector is switched to the one that calls
* back to the pi_wakeup_handler() function.
*/
- return vmx_can_use_ipiv(vcpu) || vmx_can_use_vtd_pi(vcpu->kvm);
+ return (vmx_can_use_ipiv(vcpu) && !is_td_vcpu(vcpu)) ||
+ vmx_can_use_vtd_pi(vcpu->kvm);
}
void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
@@ -200,7 +222,8 @@ void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
if (!vmx_needs_pi_wakeup(vcpu))
return;
- if (kvm_vcpu_is_blocking(vcpu) && !vmx_interrupt_blocked(vcpu))
+ if (kvm_vcpu_is_blocking(vcpu) &&
+ (is_td_vcpu(vcpu) || !vmx_interrupt_blocked(vcpu)))
pi_enable_wakeup_handler(vcpu);
/*
@@ -94,6 +94,17 @@ static inline bool pi_test_sn(struct pi_desc *pi_desc)
(unsigned long *)&pi_desc->control);
}
+struct vcpu_pi {
+ struct kvm_vcpu vcpu;
+
+ /* Posted interrupt descriptor */
+ struct pi_desc pi_desc;
+
+ /* Used if this vCPU is waiting for PI notification wakeup. */
+ struct list_head pi_wakeup_list;
+ /* Until here common layout betwwn vcpu_vmx and vcpu_tdx. */
+};
+
void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu);
void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu);
void pi_wakeup_handler(void);
@@ -584,6 +584,7 @@ int tdx_vcpu_create(struct kvm_vcpu *vcpu)
fpstate_set_confidential(&vcpu->arch.guest_fpu);
vcpu->arch.apic->guest_apic_protected = true;
+ INIT_LIST_HEAD(&tdx->pi_wakeup_list);
vcpu->arch.efer = EFER_SCE | EFER_LME | EFER_LMA | EFER_NX;
@@ -4,6 +4,7 @@
#ifdef CONFIG_INTEL_TDX_HOST
+#include "posted_intr.h"
#include "pmu_intel.h"
#include "tdx_ops.h"
@@ -69,6 +70,13 @@ union tdx_exit_reason {
struct vcpu_tdx {
struct kvm_vcpu vcpu;
+ /* Posted interrupt descriptor */
+ struct pi_desc pi_desc;
+
+ /* Used if this vCPU is waiting for PI notification wakeup. */
+ struct list_head pi_wakeup_list;
+ /* Until here same layout to struct vcpu_pi. */
+
unsigned long tdvpr_pa;
unsigned long *tdvpx_pa;
bool td_vcpu_created;
@@ -235,6 +235,14 @@ struct nested_vmx {
struct vcpu_vmx {
struct kvm_vcpu vcpu;
+
+ /* Posted interrupt descriptor */
+ struct pi_desc pi_desc;
+
+ /* Used if this vCPU is waiting for PI notification wakeup. */
+ struct list_head pi_wakeup_list;
+ /* Until here same layout to struct vcpu_pi. */
+
u8 fail;
u8 x2apic_msr_bitmap_mode;
@@ -304,12 +312,6 @@ struct vcpu_vmx {
union vmx_exit_reason exit_reason;
- /* Posted interrupt descriptor */
- struct pi_desc pi_desc;
-
- /* Used if this vCPU is waiting for PI notification wakeup. */
- struct list_head pi_wakeup_list;
-
/* Support for a guest hypervisor (nested VMX) */
struct nested_vmx nested;