[1/1] selftests: KVM: add test to print boottime wallclock

Message ID 20231006175715.105517-1-dongli.zhang@oracle.com
State New
Headers
Series [1/1] selftests: KVM: add test to print boottime wallclock |

Commit Message

Dongli Zhang Oct. 6, 2023, 5:57 p.m. UTC
  As inspired by the discussion in [1], the boottime wallclock may drift due
to the fact that the masterclock (or host monotonic clock) and kvmclock are
calculated based on the algorithms in different domains.

This is to introduce a testcase to print the boottime wallclock
periodically to help diagnose the wallclock drift issue in the future.

The idea is to wrmsr the MSR_KVM_WALL_CLOCK_NEW, and read the boottime
wallclock nanoseconds immediately.

References:
[1] https://lore.kernel.org/all/20231001111313.77586-1-nsaenz@amazon.com

Cc: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
---
 tools/testing/selftests/kvm/Makefile          |   3 +-
 .../selftests/kvm/x86_64/boottime_wallclock.c | 100 ++++++++++++++++++
 2 files changed, 102 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/kvm/x86_64/boottime_wallclock.c
  

Comments

Maxim Levitsky Oct. 10, 2023, 4:13 p.m. UTC | #1
У пт, 2023-10-06 у 10:57 -0700, Dongli Zhang пише:
> As inspired by the discussion in [1], the boottime wallclock may drift due
> to the fact that the masterclock (or host monotonic clock) and kvmclock are
> calculated based on the algorithms in different domains.
> 
> This is to introduce a testcase to print the boottime wallclock
> periodically to help diagnose the wallclock drift issue in the future.
> 
> The idea is to wrmsr the MSR_KVM_WALL_CLOCK_NEW, and read the boottime
> wallclock nanoseconds immediately.
> 
> References:
> [1] https://lore.kernel.org/all/20231001111313.77586-1-nsaenz@amazon.com
> 
> Cc: David Woodhouse <dwmw@amazon.co.uk>
> Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
> ---
>  tools/testing/selftests/kvm/Makefile          |   3 +-
>  .../selftests/kvm/x86_64/boottime_wallclock.c | 100 ++++++++++++++++++
>  2 files changed, 102 insertions(+), 1 deletion(-)
>  create mode 100644 tools/testing/selftests/kvm/x86_64/boottime_wallclock.c
> 
> diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
> index a3bb36fb3cfc..fea05b0118de 100644
> --- a/tools/testing/selftests/kvm/Makefile
> +++ b/tools/testing/selftests/kvm/Makefile
> @@ -60,7 +60,8 @@ LIBKVM_riscv += lib/riscv/ucall.c
>  TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh
>  
>  # Compiled test targets
> -TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test
> +TEST_GEN_PROGS_x86_64 = x86_64/boottime_wallclock
> +TEST_GEN_PROGS_x86_64 += x86_64/cpuid_test
>  TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
>  TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test
>  TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
> diff --git a/tools/testing/selftests/kvm/x86_64/boottime_wallclock.c b/tools/testing/selftests/kvm/x86_64/boottime_wallclock.c
> new file mode 100644
> index 000000000000..cc48c9b19920
> --- /dev/null
> +++ b/tools/testing/selftests/kvm/x86_64/boottime_wallclock.c
> @@ -0,0 +1,100 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (C) 2023 Oracle and/or its affiliates.
> + */
> +
> +#include <asm/kvm_para.h>
> +#include <asm/pvclock-abi.h>
> +
> +#include "kvm_util.h"
> +#include "processor.h"
> +
> +static int period = 10;
> +
> +#define GUEST_SYNC_WALLCLOCK(__stage, __val)                        \
> +		GUEST_SYNC_ARGS(__stage, __val, 0, 0, 0)
> +
> +static void guest_main(vm_paddr_t wc_pa, struct pvclock_wall_clock *wc)
> +{
> +	uint64_t wallclock;
> +
> +	while (true) {
> +		wrmsr(MSR_KVM_WALL_CLOCK_NEW, wc_pa);
> +
> +		wallclock = wc->sec * NSEC_PER_SEC + wc->nsec;
> +
> +		GUEST_SYNC_WALLCLOCK(0, wallclock);

Won't this fill the output very fast?
Do you think it will be worth it to wait some time (e.g 1 second or at least 1/10 of a second)
between each print?

> +	}
> +}
> +
> +static void handle_sync(struct ucall *uc)
> +{
> +	uint64_t wallclock;
> +
> +	wallclock = uc->args[2];
> +
> +	pr_info("Boottime wallclock value: %"PRIu64" ns\n", wallclock);
> +}
> +
> +static void handle_abort(struct ucall *uc)
> +{
> +	REPORT_GUEST_ASSERT(*uc);
> +}
> +
> +static void enter_guest(struct kvm_vcpu *vcpu)
> +{
> +	struct ucall uc;
> +
> +	while (true) {
> +		vcpu_run(vcpu);
> +
> +		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
> +
> +		switch (get_ucall(vcpu, &uc)) {
> +		case UCALL_SYNC:
> +			handle_sync(&uc);
> +			break;
> +		case UCALL_ABORT:
> +			handle_abort(&uc);
> +			return;
> +		default:
> +			TEST_ASSERT(0, "unhandled ucall: %ld\n", uc.cmd);
> +			return;
> +		}
> +
> +		sleep(period);
> +	}
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +	struct kvm_vcpu *vcpu;
> +	struct kvm_vm *vm;
> +	vm_vaddr_t wc_gva;
> +	vm_paddr_t wc_gpa;
> +	int opt;
> +
> +	while ((opt = getopt(argc, argv, "p:h")) != -1) {
> +		switch (opt) {
> +		case 'p':
> +			period = atoi_positive("The period (seconds)", optarg);
> +			break;
> +		case 'h':
> +		default:
> +			pr_info("usage: %s [-p period (seconds)]\n", argv[0]);
> +			exit(1);
> +		}
> +	}
> +
> +	pr_info("Capture boottime wallclock every %d seconds.\n", period);
> +	pr_info("Stop with Ctrl + c.\n\n");
> +
> +	vm = vm_create_with_one_vcpu(&vcpu, guest_main);
> +
> +	wc_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000);
> +	wc_gpa = addr_gva2gpa(vm, wc_gva);
> +	vcpu_args_set(vcpu, 2, wc_gpa, wc_gva);
> +
> +	enter_guest(vcpu);
> +	kvm_vm_free(vm);
> +}


Best regards,
	Maxim Levitsky
  
Dongli Zhang Oct. 10, 2023, 4:31 p.m. UTC | #2
Hi Maxim,

On 10/10/23 09:13, Maxim Levitsky wrote:
> У пт, 2023-10-06 у 10:57 -0700, Dongli Zhang пише:
>> As inspired by the discussion in [1], the boottime wallclock may drift due
>> to the fact that the masterclock (or host monotonic clock) and kvmclock are
>> calculated based on the algorithms in different domains.
>>
>> This is to introduce a testcase to print the boottime wallclock
>> periodically to help diagnose the wallclock drift issue in the future.
>>
>> The idea is to wrmsr the MSR_KVM_WALL_CLOCK_NEW, and read the boottime
>> wallclock nanoseconds immediately.
>>
>> References:
>> [1] https://urldefense.com/v3/__https://lore.kernel.org/all/20231001111313.77586-1-nsaenz@amazon.com__;!!ACWV5N9M2RV99hQ!MOnoujF4PlfvZ3SUuyXgIpJC5mWiE5uLUsNW6AWgirGXcObN5uil_fnthRVcYaPA0N2uoNyLChBogHC7ZS6t$ 
>>
>> Cc: David Woodhouse <dwmw@amazon.co.uk>
>> Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
>> ---
>>  tools/testing/selftests/kvm/Makefile          |   3 +-
>>  .../selftests/kvm/x86_64/boottime_wallclock.c | 100 ++++++++++++++++++
>>  2 files changed, 102 insertions(+), 1 deletion(-)
>>  create mode 100644 tools/testing/selftests/kvm/x86_64/boottime_wallclock.c
>>
>> diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
>> index a3bb36fb3cfc..fea05b0118de 100644
>> --- a/tools/testing/selftests/kvm/Makefile
>> +++ b/tools/testing/selftests/kvm/Makefile
>> @@ -60,7 +60,8 @@ LIBKVM_riscv += lib/riscv/ucall.c
>>  TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh
>>  
>>  # Compiled test targets
>> -TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test
>> +TEST_GEN_PROGS_x86_64 = x86_64/boottime_wallclock
>> +TEST_GEN_PROGS_x86_64 += x86_64/cpuid_test
>>  TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
>>  TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test
>>  TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
>> diff --git a/tools/testing/selftests/kvm/x86_64/boottime_wallclock.c b/tools/testing/selftests/kvm/x86_64/boottime_wallclock.c
>> new file mode 100644
>> index 000000000000..cc48c9b19920
>> --- /dev/null
>> +++ b/tools/testing/selftests/kvm/x86_64/boottime_wallclock.c
>> @@ -0,0 +1,100 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/*
>> + * Copyright (C) 2023 Oracle and/or its affiliates.
>> + */
>> +
>> +#include <asm/kvm_para.h>
>> +#include <asm/pvclock-abi.h>
>> +
>> +#include "kvm_util.h"
>> +#include "processor.h"
>> +
>> +static int period = 10;
>> +
>> +#define GUEST_SYNC_WALLCLOCK(__stage, __val)                        \
>> +		GUEST_SYNC_ARGS(__stage, __val, 0, 0, 0)
>> +
>> +static void guest_main(vm_paddr_t wc_pa, struct pvclock_wall_clock *wc)
>> +{
>> +	uint64_t wallclock;
>> +
>> +	while (true) {
>> +		wrmsr(MSR_KVM_WALL_CLOCK_NEW, wc_pa);
>> +
>> +		wallclock = wc->sec * NSEC_PER_SEC + wc->nsec;
>> +
>> +		GUEST_SYNC_WALLCLOCK(0, wallclock);
> 
> Won't this fill the output very fast?
> Do you think it will be worth it to wait some time (e.g 1 second or at least 1/10 of a second)
> between each print?

The wait time is controlled by the VMM side (of selftest) as in below.

In the while loop at VMM side, it sleeps for a period (configurable argument),
until it runs into the guest again.

Therefore, the user can decide the frequency to print the boottime wallclock.

+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+	struct ucall uc;
+
+	while (true) {
+		vcpu_run(vcpu); -----------> to schedule guest vcpu here
+
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_SYNC:
+			handle_sync(&uc);
+			break;
+		case UCALL_ABORT:
+			handle_abort(&uc);
+			return;
+		default:
+			TEST_ASSERT(0, "unhandled ucall: %ld\n", uc.cmd);
+			return;
+		}
+
+		sleep(period);  ------------> sleep here
+	}
+}

Thank you very much!

Dongli Zhang

> 
>> +	}
>> +}
>> +
>> +static void handle_sync(struct ucall *uc)
>> +{
>> +	uint64_t wallclock;
>> +
>> +	wallclock = uc->args[2];
>> +
>> +	pr_info("Boottime wallclock value: %"PRIu64" ns\n", wallclock);
>> +}
>> +
>> +static void handle_abort(struct ucall *uc)
>> +{
>> +	REPORT_GUEST_ASSERT(*uc);
>> +}
>> +
>> +static void enter_guest(struct kvm_vcpu *vcpu)
>> +{
>> +	struct ucall uc;
>> +
>> +	while (true) {
>> +		vcpu_run(vcpu);
>> +
>> +		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
>> +
>> +		switch (get_ucall(vcpu, &uc)) {
>> +		case UCALL_SYNC:
>> +			handle_sync(&uc);
>> +			break;
>> +		case UCALL_ABORT:
>> +			handle_abort(&uc);
>> +			return;
>> +		default:
>> +			TEST_ASSERT(0, "unhandled ucall: %ld\n", uc.cmd);
>> +			return;
>> +		}
>> +
>> +		sleep(period);
>> +	}
>> +}
>> +
>> +int main(int argc, char *argv[])
>> +{
>> +	struct kvm_vcpu *vcpu;
>> +	struct kvm_vm *vm;
>> +	vm_vaddr_t wc_gva;
>> +	vm_paddr_t wc_gpa;
>> +	int opt;
>> +
>> +	while ((opt = getopt(argc, argv, "p:h")) != -1) {
>> +		switch (opt) {
>> +		case 'p':
>> +			period = atoi_positive("The period (seconds)", optarg);
>> +			break;
>> +		case 'h':
>> +		default:
>> +			pr_info("usage: %s [-p period (seconds)]\n", argv[0]);
>> +			exit(1);
>> +		}
>> +	}
>> +
>> +	pr_info("Capture boottime wallclock every %d seconds.\n", period);
>> +	pr_info("Stop with Ctrl + c.\n\n");
>> +
>> +	vm = vm_create_with_one_vcpu(&vcpu, guest_main);
>> +
>> +	wc_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000);
>> +	wc_gpa = addr_gva2gpa(vm, wc_gva);
>> +	vcpu_args_set(vcpu, 2, wc_gpa, wc_gva);
>> +
>> +	enter_guest(vcpu);
>> +	kvm_vm_free(vm);
>> +}
> 
> 
> Best regards,
> 	Maxim Levitsky
> 
>
  
Maxim Levitsky Oct. 11, 2023, 6:46 p.m. UTC | #3
У вт, 2023-10-10 у 09:31 -0700, Dongli Zhang пише:
> Hi Maxim,
> 
> On 10/10/23 09:13, Maxim Levitsky wrote:
> > У пт, 2023-10-06 у 10:57 -0700, Dongli Zhang пише:
> > > As inspired by the discussion in [1], the boottime wallclock may drift due
> > > to the fact that the masterclock (or host monotonic clock) and kvmclock are
> > > calculated based on the algorithms in different domains.
> > > 
> > > This is to introduce a testcase to print the boottime wallclock
> > > periodically to help diagnose the wallclock drift issue in the future.
> > > 
> > > The idea is to wrmsr the MSR_KVM_WALL_CLOCK_NEW, and read the boottime
> > > wallclock nanoseconds immediately.
> > > 
> > > References:
> > > [1] https://urldefense.com/v3/__https://lore.kernel.org/all/20231001111313.77586-1-nsaenz@amazon.com__;!!ACWV5N9M2RV99hQ!MOnoujF4PlfvZ3SUuyXgIpJC5mWiE5uLUsNW6AWgirGXcObN5uil_fnthRVcYaPA0N2uoNyLChBogHC7ZS6t$ 
> > > 
> > > Cc: David Woodhouse <dwmw@amazon.co.uk>
> > > Signed-off-by: Dongli Zhang <dongli.zhang@oracle.com>
> > > ---
> > >  tools/testing/selftests/kvm/Makefile          |   3 +-
> > >  .../selftests/kvm/x86_64/boottime_wallclock.c | 100 ++++++++++++++++++
> > >  2 files changed, 102 insertions(+), 1 deletion(-)
> > >  create mode 100644 tools/testing/selftests/kvm/x86_64/boottime_wallclock.c
> > > 
> > > diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
> > > index a3bb36fb3cfc..fea05b0118de 100644
> > > --- a/tools/testing/selftests/kvm/Makefile
> > > +++ b/tools/testing/selftests/kvm/Makefile
> > > @@ -60,7 +60,8 @@ LIBKVM_riscv += lib/riscv/ucall.c
> > >  TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh
> > >  
> > >  # Compiled test targets
> > > -TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test
> > > +TEST_GEN_PROGS_x86_64 = x86_64/boottime_wallclock
> > > +TEST_GEN_PROGS_x86_64 += x86_64/cpuid_test
> > >  TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
> > >  TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test
> > >  TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
> > > diff --git a/tools/testing/selftests/kvm/x86_64/boottime_wallclock.c b/tools/testing/selftests/kvm/x86_64/boottime_wallclock.c
> > > new file mode 100644
> > > index 000000000000..cc48c9b19920
> > > --- /dev/null
> > > +++ b/tools/testing/selftests/kvm/x86_64/boottime_wallclock.c
> > > @@ -0,0 +1,100 @@
> > > +// SPDX-License-Identifier: GPL-2.0-only
> > > +/*
> > > + * Copyright (C) 2023 Oracle and/or its affiliates.
> > > + */
> > > +
> > > +#include <asm/kvm_para.h>
> > > +#include <asm/pvclock-abi.h>
> > > +
> > > +#include "kvm_util.h"
> > > +#include "processor.h"
> > > +
> > > +static int period = 10;
> > > +
> > > +#define GUEST_SYNC_WALLCLOCK(__stage, __val)                        \
> > > +		GUEST_SYNC_ARGS(__stage, __val, 0, 0, 0)
> > > +
> > > +static void guest_main(vm_paddr_t wc_pa, struct pvclock_wall_clock *wc)
> > > +{
> > > +	uint64_t wallclock;
> > > +
> > > +	while (true) {
> > > +		wrmsr(MSR_KVM_WALL_CLOCK_NEW, wc_pa);
> > > +
> > > +		wallclock = wc->sec * NSEC_PER_SEC + wc->nsec;
> > > +
> > > +		GUEST_SYNC_WALLCLOCK(0, wallclock);
> > 
> > Won't this fill the output very fast?
> > Do you think it will be worth it to wait some time (e.g 1 second or at least 1/10 of a second)
> > between each print?
> 
> The wait time is controlled by the VMM side (of selftest) as in below.
> 
> In the while loop at VMM side, it sleeps for a period (configurable argument),
> until it runs into the guest again.
> 
> Therefore, the user can decide the frequency to print the boottime wallclock.


Sorry about this, I haven't noticed this code!


Best regards,
	Maxim Levitsky
> 
> +static void enter_guest(struct kvm_vcpu *vcpu)
> +{
> +	struct ucall uc;
> +
> +	while (true) {
> +		vcpu_run(vcpu); -----------> to schedule guest vcpu here
> +
> +		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
> +
> +		switch (get_ucall(vcpu, &uc)) {
> +		case UCALL_SYNC:
> +			handle_sync(&uc);
> +			break;
> +		case UCALL_ABORT:
> +			handle_abort(&uc);
> +			return;
> +		default:
> +			TEST_ASSERT(0, "unhandled ucall: %ld\n", uc.cmd);
> +			return;
> +		}
> +
> +		sleep(period);  ------------> sleep here
> +	}
> +}
> 
> Thank you very much!
> 
> Dongli Zhang
> 
> > > +	}
> > > +}
> > > +
> > > +static void handle_sync(struct ucall *uc)
> > > +{
> > > +	uint64_t wallclock;
> > > +
> > > +	wallclock = uc->args[2];
> > > +
> > > +	pr_info("Boottime wallclock value: %"PRIu64" ns\n", wallclock);
> > > +}
> > > +
> > > +static void handle_abort(struct ucall *uc)
> > > +{
> > > +	REPORT_GUEST_ASSERT(*uc);
> > > +}
> > > +
> > > +static void enter_guest(struct kvm_vcpu *vcpu)
> > > +{
> > > +	struct ucall uc;
> > > +
> > > +	while (true) {
> > > +		vcpu_run(vcpu);
> > > +
> > > +		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
> > > +
> > > +		switch (get_ucall(vcpu, &uc)) {
> > > +		case UCALL_SYNC:
> > > +			handle_sync(&uc);
> > > +			break;
> > > +		case UCALL_ABORT:
> > > +			handle_abort(&uc);
> > > +			return;
> > > +		default:
> > > +			TEST_ASSERT(0, "unhandled ucall: %ld\n", uc.cmd);
> > > +			return;
> > > +		}
> > > +
> > > +		sleep(period);
> > > +	}
> > > +}
> > > +
> > > +int main(int argc, char *argv[])
> > > +{
> > > +	struct kvm_vcpu *vcpu;
> > > +	struct kvm_vm *vm;
> > > +	vm_vaddr_t wc_gva;
> > > +	vm_paddr_t wc_gpa;
> > > +	int opt;
> > > +
> > > +	while ((opt = getopt(argc, argv, "p:h")) != -1) {
> > > +		switch (opt) {
> > > +		case 'p':
> > > +			period = atoi_positive("The period (seconds)", optarg);
> > > +			break;
> > > +		case 'h':
> > > +		default:
> > > +			pr_info("usage: %s [-p period (seconds)]\n", argv[0]);
> > > +			exit(1);
> > > +		}
> > > +	}
> > > +
> > > +	pr_info("Capture boottime wallclock every %d seconds.\n", period);
> > > +	pr_info("Stop with Ctrl + c.\n\n");
> > > +
> > > +	vm = vm_create_with_one_vcpu(&vcpu, guest_main);
> > > +
> > > +	wc_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000);
> > > +	wc_gpa = addr_gva2gpa(vm, wc_gva);
> > > +	vcpu_args_set(vcpu, 2, wc_gpa, wc_gva);
> > > +
> > > +	enter_guest(vcpu);
> > > +	kvm_vm_free(vm);
> > > +}
> > 
> > Best regards,
> > 	Maxim Levitsky
> > 
> >
  
Sean Christopherson Oct. 18, 2023, 7:51 p.m. UTC | #4
On Fri, Oct 06, 2023, Dongli Zhang wrote:
> As inspired by the discussion in [1], the boottime wallclock may drift due
> to the fact that the masterclock (or host monotonic clock) and kvmclock are
> calculated based on the algorithms in different domains.
> 
> This is to introduce a testcase to print the boottime wallclock
> periodically to help diagnose the wallclock drift issue in the future.
> 
> The idea is to wrmsr the MSR_KVM_WALL_CLOCK_NEW, and read the boottime
> wallclock nanoseconds immediately.

This doesn't actually test anything of interest though.  IIUC, it requires a human
looking at the output for it to provide any value.  And it requires a manual
cancelation, which makes it even less suitable for selftests.

I like the idea, e.g. I bet there are more utilities that could be written that
utilize the selftests infrastructure, just not sure what to do with this (assuming
it can't be massaged into an actual test).
  
Andrew Jones Oct. 19, 2023, 6:51 a.m. UTC | #5
On Wed, Oct 18, 2023 at 12:51:55PM -0700, Sean Christopherson wrote:
> On Fri, Oct 06, 2023, Dongli Zhang wrote:
> > As inspired by the discussion in [1], the boottime wallclock may drift due
> > to the fact that the masterclock (or host monotonic clock) and kvmclock are
> > calculated based on the algorithms in different domains.
> > 
> > This is to introduce a testcase to print the boottime wallclock
> > periodically to help diagnose the wallclock drift issue in the future.
> > 
> > The idea is to wrmsr the MSR_KVM_WALL_CLOCK_NEW, and read the boottime
> > wallclock nanoseconds immediately.
> 
> This doesn't actually test anything of interest though.  IIUC, it requires a human
> looking at the output for it to provide any value.  And it requires a manual
> cancelation, which makes it even less suitable for selftests.
> 
> I like the idea, e.g. I bet there are more utilities that could be written that
> utilize the selftests infrastructure, just not sure what to do with this (assuming
> it can't be massaged into an actual test).

Yes, there's definitely code overlap between selftests and [debug/test]
utilities. For example, I snuck a utility into [1]. For that one, without
any command line parameters it runs as a typical test. Given command line
input, it behaves as a utility (which developers may use for additional
platform-specific testing). It seems like we need a way to build and
organize these types of things separately, i.e. a utility should probably
be in tools/$DIR not tools/testing/selftests/$DIR. For [1], I don't have
much of an excuse for not just splitting the two functionalities into two
files, but, for KVM selftests, we'd need to find a way to share the
framework.

[1] https://lore.kernel.org/all/20231011135610.122850-14-ajones@ventanamicro.com/

Thanks,
drew
  
Dongli Zhang Oct. 20, 2023, 2:39 p.m. UTC | #6
Hi Sean and Andrew,

On 10/18/23 23:51, Andrew Jones wrote:
> On Wed, Oct 18, 2023 at 12:51:55PM -0700, Sean Christopherson wrote:
>> On Fri, Oct 06, 2023, Dongli Zhang wrote:
>>> As inspired by the discussion in [1], the boottime wallclock may drift due
>>> to the fact that the masterclock (or host monotonic clock) and kvmclock are
>>> calculated based on the algorithms in different domains.
>>>
>>> This is to introduce a testcase to print the boottime wallclock
>>> periodically to help diagnose the wallclock drift issue in the future.
>>>
>>> The idea is to wrmsr the MSR_KVM_WALL_CLOCK_NEW, and read the boottime
>>> wallclock nanoseconds immediately.
>>
>> This doesn't actually test anything of interest though.  IIUC, it requires a human
>> looking at the output for it to provide any value.  And it requires a manual
>> cancelation, which makes it even less suitable for selftests.
>>
>> I like the idea, e.g. I bet there are more utilities that could be written that
>> utilize the selftests infrastructure, just not sure what to do with this (assuming
>> it can't be massaged into an actual test).

Thank you very much for the suggestion.

Would that work if I turn it into a test:

1. Capture boottime_wallclock_01.
2. Wait for 10-second by default (configurable, e.g., max 60-second)
3. Capture boottime_wallclock_02.
4. Report error if drift.


I have another pvclock vCPU hotplug test with the same flow.

Thank you very much!

Dongli Zhang

> 
> Yes, there's definitely code overlap between selftests and [debug/test]
> utilities. For example, I snuck a utility into [1]. For that one, without
> any command line parameters it runs as a typical test. Given command line
> input, it behaves as a utility (which developers may use for additional
> platform-specific testing). It seems like we need a way to build and
> organize these types of things separately, i.e. a utility should probably
> be in tools/$DIR not tools/testing/selftests/$DIR. For [1], I don't have
> much of an excuse for not just splitting the two functionalities into two
> files, but, for KVM selftests, we'd need to find a way to share the
> framework.
> 
> [1] https://urldefense.com/v3/__https://lore.kernel.org/all/20231011135610.122850-14-ajones@ventanamicro.com/__;!!ACWV5N9M2RV99hQ!LuJ92LOR4jVJfhj8M0J9MUqP7520s259wSzAdAL1cV0zNrzVB2W0F_5gpEVX_SoHeKuivIt-VIVB6jaN5EuIKA$ 
> 
> Thanks,
> drew
  
David Woodhouse Oct. 20, 2023, 3:21 p.m. UTC | #7
On Fri, 2023-10-20 at 07:39 -0700, Dongli Zhang wrote:
> 
> 1. Capture boottime_wallclock_01.
> 2. Wait for 10-second by default (configurable, e.g., max 60-second)
> 3. Capture boottime_wallclock_02.
> 4. Report error if drift.
> 
> 
> I have another pvclock vCPU hotplug test with the same flow.

Wallclock, or CLOCK_TAI? You'll going to get the odd one-second jump
with the former.
  
Sean Christopherson Oct. 20, 2023, 3:22 p.m. UTC | #8
On Fri, Oct 20, 2023, Dongli Zhang wrote:
> Hi Sean and Andrew,
> 
> On 10/18/23 23:51, Andrew Jones wrote:
> > On Wed, Oct 18, 2023 at 12:51:55PM -0700, Sean Christopherson wrote:
> >> On Fri, Oct 06, 2023, Dongli Zhang wrote:
> >>> As inspired by the discussion in [1], the boottime wallclock may drift due
> >>> to the fact that the masterclock (or host monotonic clock) and kvmclock are
> >>> calculated based on the algorithms in different domains.
> >>>
> >>> This is to introduce a testcase to print the boottime wallclock
> >>> periodically to help diagnose the wallclock drift issue in the future.
> >>>
> >>> The idea is to wrmsr the MSR_KVM_WALL_CLOCK_NEW, and read the boottime
> >>> wallclock nanoseconds immediately.
> >>
> >> This doesn't actually test anything of interest though.  IIUC, it requires a human
> >> looking at the output for it to provide any value.  And it requires a manual
> >> cancelation, which makes it even less suitable for selftests.
> >>
> >> I like the idea, e.g. I bet there are more utilities that could be written that
> >> utilize the selftests infrastructure, just not sure what to do with this (assuming
> >> it can't be massaged into an actual test).
> 
> Thank you very much for the suggestion.
> 
> Would that work if I turn it into a test:
> 
> 1. Capture boottime_wallclock_01.
> 2. Wait for 10-second by default (configurable, e.g., max 60-second)
> 3. Capture boottime_wallclock_02.
> 4. Report error if drift.

Rather than pick an arbitrary time of 10 seconds, deliberately introduce a
plausible bug in KVM (or re-introduce a previous bug) and see how low you can
push the wait time while still reliably detecting the unwanted drift.  Then add
a reasonable buffer to give the test some margin for error.  Given the drift that
David reported with the xen_shinfo test, I assume/hope that a 10 second runtime
would be overkill.

I would also differentiate between total runtime and the periodic check time,
e.g. to allow checking for drift every N (milli)seconds while having a total
runtime of M seconds.  Then there's no need to set an upper bound, e.g. the user
could set the test to run in the background for multiple hours without having to
worry about the test being useless if it's canceled early.
  

Patch

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index a3bb36fb3cfc..fea05b0118de 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -60,7 +60,8 @@  LIBKVM_riscv += lib/riscv/ucall.c
 TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh
 
 # Compiled test targets
-TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test
+TEST_GEN_PROGS_x86_64 = x86_64/boottime_wallclock
+TEST_GEN_PROGS_x86_64 += x86_64/cpuid_test
 TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
 TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test
 TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
diff --git a/tools/testing/selftests/kvm/x86_64/boottime_wallclock.c b/tools/testing/selftests/kvm/x86_64/boottime_wallclock.c
new file mode 100644
index 000000000000..cc48c9b19920
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/boottime_wallclock.c
@@ -0,0 +1,100 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 Oracle and/or its affiliates.
+ */
+
+#include <asm/kvm_para.h>
+#include <asm/pvclock-abi.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+
+static int period = 10;
+
+#define GUEST_SYNC_WALLCLOCK(__stage, __val)                        \
+		GUEST_SYNC_ARGS(__stage, __val, 0, 0, 0)
+
+static void guest_main(vm_paddr_t wc_pa, struct pvclock_wall_clock *wc)
+{
+	uint64_t wallclock;
+
+	while (true) {
+		wrmsr(MSR_KVM_WALL_CLOCK_NEW, wc_pa);
+
+		wallclock = wc->sec * NSEC_PER_SEC + wc->nsec;
+
+		GUEST_SYNC_WALLCLOCK(0, wallclock);
+	}
+}
+
+static void handle_sync(struct ucall *uc)
+{
+	uint64_t wallclock;
+
+	wallclock = uc->args[2];
+
+	pr_info("Boottime wallclock value: %"PRIu64" ns\n", wallclock);
+}
+
+static void handle_abort(struct ucall *uc)
+{
+	REPORT_GUEST_ASSERT(*uc);
+}
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+	struct ucall uc;
+
+	while (true) {
+		vcpu_run(vcpu);
+
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_SYNC:
+			handle_sync(&uc);
+			break;
+		case UCALL_ABORT:
+			handle_abort(&uc);
+			return;
+		default:
+			TEST_ASSERT(0, "unhandled ucall: %ld\n", uc.cmd);
+			return;
+		}
+
+		sleep(period);
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+	vm_vaddr_t wc_gva;
+	vm_paddr_t wc_gpa;
+	int opt;
+
+	while ((opt = getopt(argc, argv, "p:h")) != -1) {
+		switch (opt) {
+		case 'p':
+			period = atoi_positive("The period (seconds)", optarg);
+			break;
+		case 'h':
+		default:
+			pr_info("usage: %s [-p period (seconds)]\n", argv[0]);
+			exit(1);
+		}
+	}
+
+	pr_info("Capture boottime wallclock every %d seconds.\n", period);
+	pr_info("Stop with Ctrl + c.\n\n");
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+	wc_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000);
+	wc_gpa = addr_gva2gpa(vm, wc_gva);
+	vcpu_args_set(vcpu, 2, wc_gpa, wc_gva);
+
+	enter_guest(vcpu);
+	kvm_vm_free(vm);
+}