[RFC,06/33] KVM: x86: hyper-v: Introduce VTL awareness to Hyper-V's PV-IPIs

Message ID 20231108111806.92604-7-nsaenz@amazon.com
State New
Headers
Series KVM: x86: hyperv: Introduce VSM support |

Commit Message

Nicolas Saenz Julienne Nov. 8, 2023, 11:17 a.m. UTC
  HVCALL_SEND_IPI and HVCALL_SEND_IPI_EX allow targeting specific a
specific VTL. Honour the requests.

Signed-off-by: Nicolas Saenz Julienne <nsaenz@amazon.com>
---
 arch/x86/kvm/hyperv.c             | 24 +++++++++++++++++-------
 arch/x86/kvm/trace.h              | 20 ++++++++++++--------
 include/asm-generic/hyperv-tlfs.h |  6 ++++--
 3 files changed, 33 insertions(+), 17 deletions(-)
  

Comments

Maxim Levitsky Nov. 28, 2023, 7:14 a.m. UTC | #1
On Wed, 2023-11-08 at 11:17 +0000, Nicolas Saenz Julienne wrote:
> HVCALL_SEND_IPI and HVCALL_SEND_IPI_EX allow targeting specific a
> specific VTL. Honour the requests.
> 
> Signed-off-by: Nicolas Saenz Julienne <nsaenz@amazon.com>
> ---
>  arch/x86/kvm/hyperv.c             | 24 +++++++++++++++++-------
>  arch/x86/kvm/trace.h              | 20 ++++++++++++--------
>  include/asm-generic/hyperv-tlfs.h |  6 ++++--
>  3 files changed, 33 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index d4b1b53ea63d..2cf430f6ddd8 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -2230,7 +2230,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
>  }
>  
>  static void kvm_hv_send_ipi_to_many(struct kvm *kvm, u32 vector,
> -				    u64 *sparse_banks, u64 valid_bank_mask)
> +				    u64 *sparse_banks, u64 valid_bank_mask, int vtl)
>  {
>  	struct kvm_lapic_irq irq = {
>  		.delivery_mode = APIC_DM_FIXED,
> @@ -2245,6 +2245,9 @@ static void kvm_hv_send_ipi_to_many(struct kvm *kvm, u32 vector,
>  					    valid_bank_mask, sparse_banks))
>  			continue;
>  
> +		if (kvm_hv_get_active_vtl(vcpu) != vtl)
> +			continue;

Do I understand correctly that this is a temporary limitation?
In other words, can a vCPU running in VTL1 send an IPI to a vCPU running VTL0,
forcing the target vCPU to do async switch to VTL1?
I think that this is possible.


If we go with my suggestion to use apic namespaces and/or with multiple VMs per VTLs,
then I imagine it like that:

In-kernel hyperv IPI emulation works as it does currently as long as the IPI's VTL matches the VTL
which is assigned to the vCPU, and if it doesn't, it should result in a userspace VM exit.

I do think that we will need KVM to know a vCPU VTL anyway, however we might get away
(or have to if we go with multiple VMs approach) with having explicit mapping between
all KVM's vCPUs which emulate a single VM's vCPU.

Best regards,
	Maxim Levitsky


> +
>  		/* We fail only when APIC is disabled */
>  		kvm_apic_set_irq(vcpu, &irq, NULL);
>  	}
> @@ -2257,13 +2260,19 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
>  	struct kvm *kvm = vcpu->kvm;
>  	struct hv_send_ipi_ex send_ipi_ex;
>  	struct hv_send_ipi send_ipi;
> +	union hv_input_vtl *in_vtl;
>  	u64 valid_bank_mask;
>  	u32 vector;
>  	bool all_cpus;
> +	u8 vtl;
> +
> +	/* VTL is at the same offset on both IPI types */
> +	in_vtl = &send_ipi.in_vtl;
> +	vtl = in_vtl->use_target_vtl ? in_vtl->target_vtl : kvm_hv_get_active_vtl(vcpu);
>  
>  	if (hc->code == HVCALL_SEND_IPI) {
>  		if (!hc->fast) {
> -			if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi,
> +			if (unlikely(kvm_vcpu_read_guest(vcpu, hc->ingpa, &send_ipi,
>  						    sizeof(send_ipi))))
>  				return HV_STATUS_INVALID_HYPERCALL_INPUT;
>  			sparse_banks[0] = send_ipi.cpu_mask;
> @@ -2278,10 +2287,10 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
>  		all_cpus = false;
>  		valid_bank_mask = BIT_ULL(0);
>  
> -		trace_kvm_hv_send_ipi(vector, sparse_banks[0]);
> +		trace_kvm_hv_send_ipi(vector, sparse_banks[0], vtl);
>  	} else {
>  		if (!hc->fast) {
> -			if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi_ex,
> +			if (unlikely(kvm_vcpu_read_guest(vcpu, hc->ingpa, &send_ipi_ex,
>  						    sizeof(send_ipi_ex))))
>  				return HV_STATUS_INVALID_HYPERCALL_INPUT;
>  		} else {
> @@ -2292,7 +2301,8 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
>  
>  		trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector,
>  					 send_ipi_ex.vp_set.format,
> -					 send_ipi_ex.vp_set.valid_bank_mask);
> +					 send_ipi_ex.vp_set.valid_bank_mask,
> +					 vtl);
>  
>  		vector = send_ipi_ex.vector;
>  		valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask;
> @@ -2322,9 +2332,9 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
>  		return HV_STATUS_INVALID_HYPERCALL_INPUT;
>  
>  	if (all_cpus)
> -		kvm_hv_send_ipi_to_many(kvm, vector, NULL, 0);
> +		kvm_hv_send_ipi_to_many(kvm, vector, NULL, 0, vtl);
>  	else
> -		kvm_hv_send_ipi_to_many(kvm, vector, sparse_banks, valid_bank_mask);
> +		kvm_hv_send_ipi_to_many(kvm, vector, sparse_banks, valid_bank_mask, vtl);
>  
>  ret_success:
>  	return HV_STATUS_SUCCESS;
> diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
> index 83843379813e..ab8839c47bc7 100644
> --- a/arch/x86/kvm/trace.h
> +++ b/arch/x86/kvm/trace.h
> @@ -1606,42 +1606,46 @@ TRACE_EVENT(kvm_hv_flush_tlb_ex,
>   * Tracepoints for kvm_hv_send_ipi.
>   */
>  TRACE_EVENT(kvm_hv_send_ipi,
> -	TP_PROTO(u32 vector, u64 processor_mask),
> -	TP_ARGS(vector, processor_mask),
> +	TP_PROTO(u32 vector, u64 processor_mask, u8 vtl),
> +	TP_ARGS(vector, processor_mask, vtl),
>  
>  	TP_STRUCT__entry(
>  		__field(u32, vector)
>  		__field(u64, processor_mask)
> +		__field(u8, vtl)
>  	),
>  
>  	TP_fast_assign(
>  		__entry->vector = vector;
>  		__entry->processor_mask = processor_mask;
> +		__entry->vtl = vtl;
>  	),
>  
> -	TP_printk("vector %x processor_mask 0x%llx",
> -		  __entry->vector, __entry->processor_mask)
> +	TP_printk("vector %x processor_mask 0x%llx vtl %d",
> +		  __entry->vector, __entry->processor_mask, __entry->vtl)
>  );
>  
>  TRACE_EVENT(kvm_hv_send_ipi_ex,
> -	TP_PROTO(u32 vector, u64 format, u64 valid_bank_mask),
> -	TP_ARGS(vector, format, valid_bank_mask),
> +	TP_PROTO(u32 vector, u64 format, u64 valid_bank_mask, u8 vtl),
> +	TP_ARGS(vector, format, valid_bank_mask, vtl),
>  
>  	TP_STRUCT__entry(
>  		__field(u32, vector)
>  		__field(u64, format)
>  		__field(u64, valid_bank_mask)
> +		__field(u8, vtl)
>  	),
>  
>  	TP_fast_assign(
>  		__entry->vector = vector;
>  		__entry->format = format;
>  		__entry->valid_bank_mask = valid_bank_mask;
> +		__entry->vtl = vtl;
>  	),
>  
> -	TP_printk("vector %x format %llx valid_bank_mask 0x%llx",
> +	TP_printk("vector %x format %llx valid_bank_mask 0x%llx vtl %d",
>  		  __entry->vector, __entry->format,
> -		  __entry->valid_bank_mask)
> +		  __entry->valid_bank_mask, __entry->vtl)
>  );
>  
>  TRACE_EVENT(kvm_pv_tlb_flush,
> diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
> index 0e7643c1ef01..40d7dc793c03 100644
> --- a/include/asm-generic/hyperv-tlfs.h
> +++ b/include/asm-generic/hyperv-tlfs.h
> @@ -424,14 +424,16 @@ struct hv_vpset {
>  /* HvCallSendSyntheticClusterIpi hypercall */
>  struct hv_send_ipi {
>  	u32 vector;
> -	u32 reserved;
> +	union hv_input_vtl in_vtl;
> +	u8 reserved[3];
>  	u64 cpu_mask;
>  } __packed;
>  
>  /* HvCallSendSyntheticClusterIpiEx hypercall */
>  struct hv_send_ipi_ex {
>  	u32 vector;
> -	u32 reserved;
> +	union hv_input_vtl in_vtl;
> +	u8 reserved[3];
>  	struct hv_vpset vp_set;
>  } __packed;
>
  
Nicolas Saenz Julienne Dec. 1, 2023, 4:31 p.m. UTC | #2
On Tue Nov 28, 2023 at 7:14 AM UTC, Maxim Levitsky wrote:
> On Wed, 2023-11-08 at 11:17 +0000, Nicolas Saenz Julienne wrote:
> > HVCALL_SEND_IPI and HVCALL_SEND_IPI_EX allow targeting specific a
> > specific VTL. Honour the requests.
> >
> > Signed-off-by: Nicolas Saenz Julienne <nsaenz@amazon.com>
> > ---
> >  arch/x86/kvm/hyperv.c             | 24 +++++++++++++++++-------
> >  arch/x86/kvm/trace.h              | 20 ++++++++++++--------
> >  include/asm-generic/hyperv-tlfs.h |  6 ++++--
> >  3 files changed, 33 insertions(+), 17 deletions(-)
> >
> > diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> > index d4b1b53ea63d..2cf430f6ddd8 100644
> > --- a/arch/x86/kvm/hyperv.c
> > +++ b/arch/x86/kvm/hyperv.c
> > @@ -2230,7 +2230,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
> >  }
> >
> >  static void kvm_hv_send_ipi_to_many(struct kvm *kvm, u32 vector,
> > -                                 u64 *sparse_banks, u64 valid_bank_mask)
> > +                                 u64 *sparse_banks, u64 valid_bank_mask, int vtl)
> >  {
> >       struct kvm_lapic_irq irq = {
> >               .delivery_mode = APIC_DM_FIXED,
> > @@ -2245,6 +2245,9 @@ static void kvm_hv_send_ipi_to_many(struct kvm *kvm, u32 vector,
> >                                           valid_bank_mask, sparse_banks))
> >                       continue;
> >
> > +             if (kvm_hv_get_active_vtl(vcpu) != vtl)
> > +                     continue;
>
> Do I understand correctly that this is a temporary limitation?
> In other words, can a vCPU running in VTL1 send an IPI to a vCPU running VTL0,
> forcing the target vCPU to do async switch to VTL1?
> I think that this is possible.


The diff is missing some context. See this simplified implementation
(when all_cpus is set in the parent function):

static void kvm_hv_send_ipi_to_many(struct kvm *kvm, u32 vector, int vtl)
{
	[...]
	kvm_for_each_vcpu(i, vcpu, kvm) {
		if (kvm_hv_get_active_vtl(vcpu) != vtl)
			continue;

		kvm_apic_set_irq(vcpu, &irq, NULL);
	}
}

With the one vCPU per VTL approach, kvm_for_each_vcpu() will iterate
over *all* vCPUs regardless of their VTL. The IPI is targetted at a
specific VTL, hence the need to filter.

VTL1 -> VTL0 IPIs are supported and happen (although they are extremely
rare).

Nicolas
  
Maxim Levitsky Dec. 5, 2023, 3:02 p.m. UTC | #3
On Fri, 2023-12-01 at 16:31 +0000, Nicolas Saenz Julienne wrote:
> On Tue Nov 28, 2023 at 7:14 AM UTC, Maxim Levitsky wrote:
> > On Wed, 2023-11-08 at 11:17 +0000, Nicolas Saenz Julienne wrote:
> > > HVCALL_SEND_IPI and HVCALL_SEND_IPI_EX allow targeting specific a
> > > specific VTL. Honour the requests.
> > > 
> > > Signed-off-by: Nicolas Saenz Julienne <nsaenz@amazon.com>
> > > ---
> > >  arch/x86/kvm/hyperv.c             | 24 +++++++++++++++++-------
> > >  arch/x86/kvm/trace.h              | 20 ++++++++++++--------
> > >  include/asm-generic/hyperv-tlfs.h |  6 ++++--
> > >  3 files changed, 33 insertions(+), 17 deletions(-)
> > > 
> > > diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> > > index d4b1b53ea63d..2cf430f6ddd8 100644
> > > --- a/arch/x86/kvm/hyperv.c
> > > +++ b/arch/x86/kvm/hyperv.c
> > > @@ -2230,7 +2230,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
> > >  }
> > > 
> > >  static void kvm_hv_send_ipi_to_many(struct kvm *kvm, u32 vector,
> > > -                                 u64 *sparse_banks, u64 valid_bank_mask)
> > > +                                 u64 *sparse_banks, u64 valid_bank_mask, int vtl)
> > >  {
> > >       struct kvm_lapic_irq irq = {
> > >               .delivery_mode = APIC_DM_FIXED,
> > > @@ -2245,6 +2245,9 @@ static void kvm_hv_send_ipi_to_many(struct kvm *kvm, u32 vector,
> > >                                           valid_bank_mask, sparse_banks))
> > >                       continue;
> > > 
> > > +             if (kvm_hv_get_active_vtl(vcpu) != vtl)
> > > +                     continue;
> > 
> > Do I understand correctly that this is a temporary limitation?
> > In other words, can a vCPU running in VTL1 send an IPI to a vCPU running VTL0,
> > forcing the target vCPU to do async switch to VTL1?
> > I think that this is possible.
> 
> The diff is missing some context. See this simplified implementation
> (when all_cpus is set in the parent function):
> 
> static void kvm_hv_send_ipi_to_many(struct kvm *kvm, u32 vector, int vtl)
> {
> 	[...]
> 	kvm_for_each_vcpu(i, vcpu, kvm) {
> 		if (kvm_hv_get_active_vtl(vcpu) != vtl)
> 			continue;
> 
> 		kvm_apic_set_irq(vcpu, &irq, NULL);
> 	}
> }
> 
> With the one vCPU per VTL approach, kvm_for_each_vcpu() will iterate
> over *all* vCPUs regardless of their VTL. The IPI is targetted at a
> specific VTL, hence the need to filter.
> 
> VTL1 -> VTL0 IPIs are supported and happen (although they are extremely
> rare).

Makes sense now, thanks!

Best regards,
	Maxim Levitsky

> 
> Nicolas
>
  

Patch

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index d4b1b53ea63d..2cf430f6ddd8 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2230,7 +2230,7 @@  static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 }
 
 static void kvm_hv_send_ipi_to_many(struct kvm *kvm, u32 vector,
-				    u64 *sparse_banks, u64 valid_bank_mask)
+				    u64 *sparse_banks, u64 valid_bank_mask, int vtl)
 {
 	struct kvm_lapic_irq irq = {
 		.delivery_mode = APIC_DM_FIXED,
@@ -2245,6 +2245,9 @@  static void kvm_hv_send_ipi_to_many(struct kvm *kvm, u32 vector,
 					    valid_bank_mask, sparse_banks))
 			continue;
 
+		if (kvm_hv_get_active_vtl(vcpu) != vtl)
+			continue;
+
 		/* We fail only when APIC is disabled */
 		kvm_apic_set_irq(vcpu, &irq, NULL);
 	}
@@ -2257,13 +2260,19 @@  static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 	struct kvm *kvm = vcpu->kvm;
 	struct hv_send_ipi_ex send_ipi_ex;
 	struct hv_send_ipi send_ipi;
+	union hv_input_vtl *in_vtl;
 	u64 valid_bank_mask;
 	u32 vector;
 	bool all_cpus;
+	u8 vtl;
+
+	/* VTL is at the same offset on both IPI types */
+	in_vtl = &send_ipi.in_vtl;
+	vtl = in_vtl->use_target_vtl ? in_vtl->target_vtl : kvm_hv_get_active_vtl(vcpu);
 
 	if (hc->code == HVCALL_SEND_IPI) {
 		if (!hc->fast) {
-			if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi,
+			if (unlikely(kvm_vcpu_read_guest(vcpu, hc->ingpa, &send_ipi,
 						    sizeof(send_ipi))))
 				return HV_STATUS_INVALID_HYPERCALL_INPUT;
 			sparse_banks[0] = send_ipi.cpu_mask;
@@ -2278,10 +2287,10 @@  static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 		all_cpus = false;
 		valid_bank_mask = BIT_ULL(0);
 
-		trace_kvm_hv_send_ipi(vector, sparse_banks[0]);
+		trace_kvm_hv_send_ipi(vector, sparse_banks[0], vtl);
 	} else {
 		if (!hc->fast) {
-			if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi_ex,
+			if (unlikely(kvm_vcpu_read_guest(vcpu, hc->ingpa, &send_ipi_ex,
 						    sizeof(send_ipi_ex))))
 				return HV_STATUS_INVALID_HYPERCALL_INPUT;
 		} else {
@@ -2292,7 +2301,8 @@  static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 
 		trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector,
 					 send_ipi_ex.vp_set.format,
-					 send_ipi_ex.vp_set.valid_bank_mask);
+					 send_ipi_ex.vp_set.valid_bank_mask,
+					 vtl);
 
 		vector = send_ipi_ex.vector;
 		valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask;
@@ -2322,9 +2332,9 @@  static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 		return HV_STATUS_INVALID_HYPERCALL_INPUT;
 
 	if (all_cpus)
-		kvm_hv_send_ipi_to_many(kvm, vector, NULL, 0);
+		kvm_hv_send_ipi_to_many(kvm, vector, NULL, 0, vtl);
 	else
-		kvm_hv_send_ipi_to_many(kvm, vector, sparse_banks, valid_bank_mask);
+		kvm_hv_send_ipi_to_many(kvm, vector, sparse_banks, valid_bank_mask, vtl);
 
 ret_success:
 	return HV_STATUS_SUCCESS;
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 83843379813e..ab8839c47bc7 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1606,42 +1606,46 @@  TRACE_EVENT(kvm_hv_flush_tlb_ex,
  * Tracepoints for kvm_hv_send_ipi.
  */
 TRACE_EVENT(kvm_hv_send_ipi,
-	TP_PROTO(u32 vector, u64 processor_mask),
-	TP_ARGS(vector, processor_mask),
+	TP_PROTO(u32 vector, u64 processor_mask, u8 vtl),
+	TP_ARGS(vector, processor_mask, vtl),
 
 	TP_STRUCT__entry(
 		__field(u32, vector)
 		__field(u64, processor_mask)
+		__field(u8, vtl)
 	),
 
 	TP_fast_assign(
 		__entry->vector = vector;
 		__entry->processor_mask = processor_mask;
+		__entry->vtl = vtl;
 	),
 
-	TP_printk("vector %x processor_mask 0x%llx",
-		  __entry->vector, __entry->processor_mask)
+	TP_printk("vector %x processor_mask 0x%llx vtl %d",
+		  __entry->vector, __entry->processor_mask, __entry->vtl)
 );
 
 TRACE_EVENT(kvm_hv_send_ipi_ex,
-	TP_PROTO(u32 vector, u64 format, u64 valid_bank_mask),
-	TP_ARGS(vector, format, valid_bank_mask),
+	TP_PROTO(u32 vector, u64 format, u64 valid_bank_mask, u8 vtl),
+	TP_ARGS(vector, format, valid_bank_mask, vtl),
 
 	TP_STRUCT__entry(
 		__field(u32, vector)
 		__field(u64, format)
 		__field(u64, valid_bank_mask)
+		__field(u8, vtl)
 	),
 
 	TP_fast_assign(
 		__entry->vector = vector;
 		__entry->format = format;
 		__entry->valid_bank_mask = valid_bank_mask;
+		__entry->vtl = vtl;
 	),
 
-	TP_printk("vector %x format %llx valid_bank_mask 0x%llx",
+	TP_printk("vector %x format %llx valid_bank_mask 0x%llx vtl %d",
 		  __entry->vector, __entry->format,
-		  __entry->valid_bank_mask)
+		  __entry->valid_bank_mask, __entry->vtl)
 );
 
 TRACE_EVENT(kvm_pv_tlb_flush,
diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
index 0e7643c1ef01..40d7dc793c03 100644
--- a/include/asm-generic/hyperv-tlfs.h
+++ b/include/asm-generic/hyperv-tlfs.h
@@ -424,14 +424,16 @@  struct hv_vpset {
 /* HvCallSendSyntheticClusterIpi hypercall */
 struct hv_send_ipi {
 	u32 vector;
-	u32 reserved;
+	union hv_input_vtl in_vtl;
+	u8 reserved[3];
 	u64 cpu_mask;
 } __packed;
 
 /* HvCallSendSyntheticClusterIpiEx hypercall */
 struct hv_send_ipi_ex {
 	u32 vector;
-	u32 reserved;
+	union hv_input_vtl in_vtl;
+	u8 reserved[3];
 	struct hv_vpset vp_set;
 } __packed;