[v2,08/29] LoongArch: KVM: Implement vcpu handle exit interface

Message ID 20230220065735.1282809-9-zhaotianrui@loongson.cn
State New
Headers
Series Add KVM LoongArch support |

Commit Message

zhaotianrui Feb. 20, 2023, 6:57 a.m. UTC
  Implement vcpu handle exit interface, getting the exit code by ESTAT
register and using kvm exception vector to handle it.

Signed-off-by: Tianrui Zhao <zhaotianrui@loongson.cn>
---
 arch/loongarch/kvm/vcpu.c | 86 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)
  

Comments

Paolo Bonzini Feb. 20, 2023, 5:46 p.m. UTC | #1
On 2/20/23 07:57, Tianrui Zhao wrote:
> +	if (ret == RESUME_GUEST)
> +		kvm_acquire_timer(vcpu);
> +
> +	if (!(ret & RESUME_HOST)) {
> +		_kvm_deliver_intr(vcpu);
> +		/* Only check for signals if not already exiting to userspace */
> +		if (signal_pending(current)) {
> +			run->exit_reason = KVM_EXIT_INTR;
> +			ret = (-EINTR << 2) | RESUME_HOST;
> +			++vcpu->stat.signal_exits;
> +			trace_kvm_exit(vcpu, KVM_TRACE_EXIT_SIGNAL);
> +		}
> +	}
> +
> +	if (ret == RESUME_GUEST) {
> +		trace_kvm_reenter(vcpu);
> +
> +		/*
> +		 * Make sure the read of VCPU requests in vcpu_reenter()
> +		 * callback is not reordered ahead of the write to vcpu->mode,
> +		 * or we could miss a TLB flush request while the requester sees
> +		 * the VCPU as outside of guest mode and not needing an IPI.
> +		 */
> +		smp_store_mb(vcpu->mode, IN_GUEST_MODE);
> +
> +		cpu = smp_processor_id();
> +		_kvm_check_requests(vcpu, cpu);
> +		_kvm_check_vmid(vcpu, cpu);
> +		vcpu->arch.host_eentry = csr_read64(LOONGARCH_CSR_EENTRY);
> +
> +		/*
> +		 * If FPU are enabled (i.e. the guest's FPU context
> +		 * is live), restore FCSR0.
> +		 */
> +		if (_kvm_guest_has_fpu(&vcpu->arch) &&
> +			read_csr_euen() & (CSR_EUEN_FPEN)) {
> +			kvm_restore_fcsr(&vcpu->arch.fpu);
> +		}
> +	}

Please avoid copying code from arch/mips/kvm since it's already pretty ugly.
  
Paolo Bonzini Feb. 20, 2023, 6:45 p.m. UTC | #2
On 2/20/23 07:57, Tianrui Zhao wrote:
> + * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV)

As far as I can see, RESUME_FLAG_NV does not exist anymore and this is 
just copied from arch/mips?

You can keep RESUME_HOST/RESUME_GUEST for the individual functions, but 
here please make it just "1" for resume guest, and "<= 0" for resume 
host.  This is easy enough to check from assembly and removes the srai by 2.

> +static int _kvm_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
> +{
> +	unsigned long exst = vcpu->arch.host_estat;
> +	u32 intr = exst & 0x1fff; /* ignore NMI */
> +	u32 exccode = (exst & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT;
> +	u32 __user *opc = (u32 __user *) vcpu->arch.pc;
> +	int ret = RESUME_GUEST, cpu;
> +
> +	vcpu->mode = OUTSIDE_GUEST_MODE;
> +
> +	/* Set a default exit reason */
> +	run->exit_reason = KVM_EXIT_UNKNOWN;
> +	run->ready_for_interrupt_injection = 1;
> +
> +	/*
> +	 * Set the appropriate status bits based on host CPU features,
> +	 * before we hit the scheduler
> +	 */

Stale comment?

> +	local_irq_enable();

Please add guest_state_exit_irqoff() here.

> +	kvm_debug("%s: exst: %lx, PC: %p, kvm_run: %p, kvm_vcpu: %p\n",
> +			__func__, exst, opc, run, vcpu);

Please add the information to the kvm_exit tracepoint (thus also 
removing variables such as "exst" or "opc" from this function) instead 
of calling kvm_debug().

> +	trace_kvm_exit(vcpu, exccode);
> +	if (exccode) {
> +		ret = _kvm_handle_fault(vcpu, exccode);
> +	} else {
> +		WARN(!intr, "suspicious vm exiting");
> +		++vcpu->stat.int_exits;
> +
> +		if (need_resched())
> +			cond_resched();

This "if" is not necessary because there is already a cond_resched() below.

> +		ret = RESUME_GUEST;

This "ret" is not necessary because "ret" is already initialized to 
RESUME_GUEST above, you can either remove it or remove the initializer.

> +	}
> +
> +	cond_resched();
> +	local_irq_disable();

At this point, ret is either RESUME_GUEST or RESUME_HOST.  So, the "if"s 
below are either all taken or all not taken, and most of this code:

	kvm_acquire_timer(vcpu);
	_kvm_deliver_intr(vcpu);

	if (signal_pending(current)) {
		run->exit_reason = KVM_EXIT_INTR;
		ret = (-EINTR << 2) | RESUME_HOST;
		++vcpu->stat.signal_exits;
		// no need for a tracepoint here
		// trace_kvm_exit(vcpu, KVM_TRACE_EXIT_SIGNAL);
	}

	trace_kvm_reenter(vcpu);

	/*
	 * Make sure the read of VCPU requests in vcpu_reenter()
	 * callback is not reordered ahead of the write to vcpu->mode,
	 * or we could miss a TLB flush request while the requester sees
	 * the VCPU as outside of guest mode and not needing an IPI.
	 */
	smp_store_mb(vcpu->mode, IN_GUEST_MODE);

	cpu = smp_processor_id();
	_kvm_check_requests(vcpu, cpu);
	_kvm_check_vmid(vcpu, cpu);
	vcpu->arch.host_eentry = csr_read64(LOONGARCH_CSR_EENTRY);

	/*
	 * If FPU are enabled (i.e. the guest's FPU context
	 * is live), restore FCSR0.
	 */
	if (_kvm_guest_has_fpu(&vcpu->arch) &&
		read_csr_euen() & (CSR_EUEN_FPEN)) {
		kvm_restore_fcsr(&vcpu->arch.fpu);
	}

(all except for the "if (signal_pending(current))" and the final "if") 
is pretty much duplicated with kvm_arch_vcpu_ioctl_run(); the remaining 
code can also be done from kvm_arch_vcpu_ioctl_run(), the cost is small. 
  Please move it to a separate function, for example:

int kvm_pre_enter_guest(struct kvm_vcpu *vcpu)
{
	if (signal_pending(current)) {
		run->exit_reason = KVM_EXIT_INTR;
		++vcpu->stat.signal_exits;
		return -EINTR;
	}

	kvm_acquire_timer(vcpu);
	_kvm_deliver_intr(vcpu);

	...

	if (_kvm_guest_has_fpu(&vcpu->arch) &&
		read_csr_euen() & (CSR_EUEN_FPEN)) {
		kvm_restore_fcsr(&vcpu->arch.fpu);
	}
	return 1;
}

Call it from _kvm_handle_exit():

	if (ret == RESUME_HOST)
		return 0;

	r = kvm_pre_enter_guest(vcpu);
	if (r > 0) {
		trace_kvm_reenter(vcpu);
		guest_state_enter_irqoff();
	}

	return r;

and from kvm_arch_vcpu_ioctl_run():

	local_irq_disable();
	guest_timing_enter_irqoff();
	r = kvm_pre_enter_guest(vcpu);
	if (r > 0) {
		trace_kvm_enter(vcpu);
		/*
		 * This should actually not be a function pointer, but
		 * just for clarity */
		 */
		guest_state_enter_irqoff();
		r = vcpu->arch.vcpu_run(run, vcpu);
		/* guest_state_exit_irqoff() already done.  */
		trace_kvm_out(vcpu);
	}
	guest_timing_exit_irqoff();
	local_irq_enable();

out:
	kvm_sigset_deactivate(vcpu);

	vcpu_put(vcpu);
	return r;

Paolo

> +	if (ret == RESUME_GUEST)
> +		kvm_acquire_timer(vcpu);
> +
> +	if (!(ret & RESUME_HOST)) {
> +		_kvm_deliver_intr(vcpu);
> +		/* Only check for signals if not already exiting to userspace */
> +		if (signal_pending(current)) {
> +			run->exit_reason = KVM_EXIT_INTR;
> +			ret = (-EINTR << 2) | RESUME_HOST;
> +			++vcpu->stat.signal_exits;
> +			trace_kvm_exit(vcpu, KVM_TRACE_EXIT_SIGNAL);
> +		}
> +	}
> +
> +	if (ret == RESUME_GUEST) {
> +		trace_kvm_reenter(vcpu);
> +
> +		/*
> +		 * Make sure the read of VCPU requests in vcpu_reenter()
> +		 * callback is not reordered ahead of the write to vcpu->mode,
> +		 * or we could miss a TLB flush request while the requester sees
> +		 * the VCPU as outside of guest mode and not needing an IPI.
> +		 */
> +		smp_store_mb(vcpu->mode, IN_GUEST_MODE);
> +
> +		cpu = smp_processor_id();
> +		_kvm_check_requests(vcpu, cpu);
> +		_kvm_check_vmid(vcpu, cpu);
> +		vcpu->arch.host_eentry = csr_read64(LOONGARCH_CSR_EENTRY);
> +
> +		/*
> +		 * If FPU are enabled (i.e. the guest's FPU context
> +		 * is live), restore FCSR0.
> +		 */
> +		if (_kvm_guest_has_fpu(&vcpu->arch) &&
> +			read_csr_euen() & (CSR_EUEN_FPEN)) {
> +			kvm_restore_fcsr(&vcpu->arch.fpu);
> +		}
> +	}
> +
> +	return ret;
> +}
> +
>   int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
>   {
>   	int i;
  
zhaotianrui Feb. 21, 2023, 3:17 a.m. UTC | #3
在 2023年02月21日 02:45, Paolo Bonzini 写道:
> On 2/20/23 07:57, Tianrui Zhao wrote:
>> + * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | 
>> RESUME_FLAG_NV)
>
> As far as I can see, RESUME_FLAG_NV does not exist anymore and this is 
> just copied from arch/mips?
>
> You can keep RESUME_HOST/RESUME_GUEST for the individual functions, 
> but here please make it just "1" for resume guest, and "<= 0" for 
> resume host.  This is easy enough to check from assembly and removes 
> the srai by 2.
>
>> +static int _kvm_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
>> +{
>> +    unsigned long exst = vcpu->arch.host_estat;
>> +    u32 intr = exst & 0x1fff; /* ignore NMI */
>> +    u32 exccode = (exst & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT;
>> +    u32 __user *opc = (u32 __user *) vcpu->arch.pc;
>> +    int ret = RESUME_GUEST, cpu;
>> +
>> +    vcpu->mode = OUTSIDE_GUEST_MODE;
>> +
>> +    /* Set a default exit reason */
>> +    run->exit_reason = KVM_EXIT_UNKNOWN;
>> +    run->ready_for_interrupt_injection = 1;
>> +
>> +    /*
>> +     * Set the appropriate status bits based on host CPU features,
>> +     * before we hit the scheduler
>> +     */
>
> Stale comment?

I will remove this comment.

Thanks
Tianrui Zhao

>
>> +    local_irq_enable();
>
> Please add guest_state_exit_irqoff() here.

I will add this function here.

Thanks
Tianrui Zhao

>
>> +    kvm_debug("%s: exst: %lx, PC: %p, kvm_run: %p, kvm_vcpu: %p\n",
>> +            __func__, exst, opc, run, vcpu);
>
> Please add the information to the kvm_exit tracepoint (thus also 
> removing variables such as "exst" or "opc" from this function) instead 
> of calling kvm_debug().

Ok, i will fix the kvm exit tracepoint function.

Thanks
Tianrui Zhao

>
>> +    trace_kvm_exit(vcpu, exccode);
>> +    if (exccode) {
>> +        ret = _kvm_handle_fault(vcpu, exccode);
>> +    } else {
>> +        WARN(!intr, "suspicious vm exiting");
>> +        ++vcpu->stat.int_exits;
>> +
>> +        if (need_resched())
>> +            cond_resched();
>
> This "if" is not necessary because there is already a cond_resched() 
> below.

Thanks, I will remove this cond_resched function.

Thanks
Tianrui Zhao

>
>> +        ret = RESUME_GUEST;
>
> This "ret" is not necessary because "ret" is already initialized to 
> RESUME_GUEST above, you can either remove it or remove the initializer.

Ok, i will remove this "ret" .

Thanks
Tianrui Zhao

>
>> +    }
>> +
>> +    cond_resched();
>> +    local_irq_disable();
>
> At this point, ret is either RESUME_GUEST or RESUME_HOST.  So, the 
> "if"s below are either all taken or all not taken, and most of this code:
>
>     kvm_acquire_timer(vcpu);
>     _kvm_deliver_intr(vcpu);
>
>     if (signal_pending(current)) {
>         run->exit_reason = KVM_EXIT_INTR;
>         ret = (-EINTR << 2) | RESUME_HOST;
>         ++vcpu->stat.signal_exits;
>         // no need for a tracepoint here
>         // trace_kvm_exit(vcpu, KVM_TRACE_EXIT_SIGNAL);
>     }
>
>     trace_kvm_reenter(vcpu);
>
>     /*
>      * Make sure the read of VCPU requests in vcpu_reenter()
>      * callback is not reordered ahead of the write to vcpu->mode,
>      * or we could miss a TLB flush request while the requester sees
>      * the VCPU as outside of guest mode and not needing an IPI.
>      */
>     smp_store_mb(vcpu->mode, IN_GUEST_MODE);
>
>     cpu = smp_processor_id();
>     _kvm_check_requests(vcpu, cpu);
>     _kvm_check_vmid(vcpu, cpu);
>     vcpu->arch.host_eentry = csr_read64(LOONGARCH_CSR_EENTRY);
>
>     /*
>      * If FPU are enabled (i.e. the guest's FPU context
>      * is live), restore FCSR0.
>      */
>     if (_kvm_guest_has_fpu(&vcpu->arch) &&
>         read_csr_euen() & (CSR_EUEN_FPEN)) {
>         kvm_restore_fcsr(&vcpu->arch.fpu);
>     }
>
> (all except for the "if (signal_pending(current))" and the final "if") 
> is pretty much duplicated with kvm_arch_vcpu_ioctl_run(); the 
> remaining code can also be done from kvm_arch_vcpu_ioctl_run(), the 
> cost is small.  Please move it to a separate function, for example:
>
> int kvm_pre_enter_guest(struct kvm_vcpu *vcpu)
> {
>     if (signal_pending(current)) {
>         run->exit_reason = KVM_EXIT_INTR;
>         ++vcpu->stat.signal_exits;
>         return -EINTR;
>     }
>
>     kvm_acquire_timer(vcpu);
>     _kvm_deliver_intr(vcpu);
>
>     ...
>
>     if (_kvm_guest_has_fpu(&vcpu->arch) &&
>         read_csr_euen() & (CSR_EUEN_FPEN)) {
>         kvm_restore_fcsr(&vcpu->arch.fpu);
>     }
>     return 1;
> }
>
> Call it from _kvm_handle_exit():
>
>     if (ret == RESUME_HOST)
>         return 0;
>
>     r = kvm_pre_enter_guest(vcpu);
>     if (r > 0) {
>         trace_kvm_reenter(vcpu);
>         guest_state_enter_irqoff();
>     }
>
>     return r;
>
> and from kvm_arch_vcpu_ioctl_run():
>
>     local_irq_disable();
>     guest_timing_enter_irqoff();
>     r = kvm_pre_enter_guest(vcpu);
>     if (r > 0) {
>         trace_kvm_enter(vcpu);
>         /*
>          * This should actually not be a function pointer, but
>          * just for clarity */
>          */
>         guest_state_enter_irqoff();
>         r = vcpu->arch.vcpu_run(run, vcpu);
>         /* guest_state_exit_irqoff() already done.  */
>         trace_kvm_out(vcpu);
>     }
>     guest_timing_exit_irqoff();
>     local_irq_enable();
>
> out:
>     kvm_sigset_deactivate(vcpu);
>
>     vcpu_put(vcpu);
>     return r;
>
> Paolo

Thanks, I will reorganize this code and add the kvm_pre_enter_guest 
function, and
apply it in the vcpu_handle_exit and vcpu_run.

Thanks
Tianrui Zhao

>
>> +    if (ret == RESUME_GUEST)
>> +        kvm_acquire_timer(vcpu);
>> +
>> +    if (!(ret & RESUME_HOST)) {
>> +        _kvm_deliver_intr(vcpu);
>> +        /* Only check for signals if not already exiting to 
>> userspace */
>> +        if (signal_pending(current)) {
>> +            run->exit_reason = KVM_EXIT_INTR;
>> +            ret = (-EINTR << 2) | RESUME_HOST;
>> +            ++vcpu->stat.signal_exits;
>> +            trace_kvm_exit(vcpu, KVM_TRACE_EXIT_SIGNAL);
>> +        }
>> +    }
>> +
>> +    if (ret == RESUME_GUEST) {
>> +        trace_kvm_reenter(vcpu);
>> +
>> +        /*
>> +         * Make sure the read of VCPU requests in vcpu_reenter()
>> +         * callback is not reordered ahead of the write to vcpu->mode,
>> +         * or we could miss a TLB flush request while the requester 
>> sees
>> +         * the VCPU as outside of guest mode and not needing an IPI.
>> +         */
>> +        smp_store_mb(vcpu->mode, IN_GUEST_MODE);
>> +
>> +        cpu = smp_processor_id();
>> +        _kvm_check_requests(vcpu, cpu);
>> +        _kvm_check_vmid(vcpu, cpu);
>> +        vcpu->arch.host_eentry = csr_read64(LOONGARCH_CSR_EENTRY);
>> +
>> +        /*
>> +         * If FPU are enabled (i.e. the guest's FPU context
>> +         * is live), restore FCSR0.
>> +         */
>> +        if (_kvm_guest_has_fpu(&vcpu->arch) &&
>> +            read_csr_euen() & (CSR_EUEN_FPEN)) {
>> +            kvm_restore_fcsr(&vcpu->arch.fpu);
>> +        }
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>>   int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
>>   {
>>       int i;
  

Patch

diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index 571ac8b9d..e08a4faa0 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -38,6 +38,92 @@  static int _kvm_check_requests(struct kvm_vcpu *vcpu, int cpu)
 	return ret;
 }
 
+/*
+ * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV)
+ */
+static int _kvm_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+	unsigned long exst = vcpu->arch.host_estat;
+	u32 intr = exst & 0x1fff; /* ignore NMI */
+	u32 exccode = (exst & CSR_ESTAT_EXC) >> CSR_ESTAT_EXC_SHIFT;
+	u32 __user *opc = (u32 __user *) vcpu->arch.pc;
+	int ret = RESUME_GUEST, cpu;
+
+	vcpu->mode = OUTSIDE_GUEST_MODE;
+
+	/* Set a default exit reason */
+	run->exit_reason = KVM_EXIT_UNKNOWN;
+	run->ready_for_interrupt_injection = 1;
+
+	/*
+	 * Set the appropriate status bits based on host CPU features,
+	 * before we hit the scheduler
+	 */
+
+	local_irq_enable();
+
+	kvm_debug("%s: exst: %lx, PC: %p, kvm_run: %p, kvm_vcpu: %p\n",
+			__func__, exst, opc, run, vcpu);
+	trace_kvm_exit(vcpu, exccode);
+	if (exccode) {
+		ret = _kvm_handle_fault(vcpu, exccode);
+	} else {
+		WARN(!intr, "suspicious vm exiting");
+		++vcpu->stat.int_exits;
+
+		if (need_resched())
+			cond_resched();
+
+		ret = RESUME_GUEST;
+	}
+
+	cond_resched();
+
+	local_irq_disable();
+
+	if (ret == RESUME_GUEST)
+		kvm_acquire_timer(vcpu);
+
+	if (!(ret & RESUME_HOST)) {
+		_kvm_deliver_intr(vcpu);
+		/* Only check for signals if not already exiting to userspace */
+		if (signal_pending(current)) {
+			run->exit_reason = KVM_EXIT_INTR;
+			ret = (-EINTR << 2) | RESUME_HOST;
+			++vcpu->stat.signal_exits;
+			trace_kvm_exit(vcpu, KVM_TRACE_EXIT_SIGNAL);
+		}
+	}
+
+	if (ret == RESUME_GUEST) {
+		trace_kvm_reenter(vcpu);
+
+		/*
+		 * Make sure the read of VCPU requests in vcpu_reenter()
+		 * callback is not reordered ahead of the write to vcpu->mode,
+		 * or we could miss a TLB flush request while the requester sees
+		 * the VCPU as outside of guest mode and not needing an IPI.
+		 */
+		smp_store_mb(vcpu->mode, IN_GUEST_MODE);
+
+		cpu = smp_processor_id();
+		_kvm_check_requests(vcpu, cpu);
+		_kvm_check_vmid(vcpu, cpu);
+		vcpu->arch.host_eentry = csr_read64(LOONGARCH_CSR_EENTRY);
+
+		/*
+		 * If FPU are enabled (i.e. the guest's FPU context
+		 * is live), restore FCSR0.
+		 */
+		if (_kvm_guest_has_fpu(&vcpu->arch) &&
+			read_csr_euen() & (CSR_EUEN_FPEN)) {
+			kvm_restore_fcsr(&vcpu->arch.fpu);
+		}
+	}
+
+	return ret;
+}
+
 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 {
 	int i;