[4/4] drivers/clocksource/hyper-v: Add TSC page support for root partition
Commit Message
From: Stanislav Kinsburskiy <stanislav.kinsburskiy@gmail.com>
It hyper-v root partition guest has to map the page, specified by the
hypervisor (instead of providing the page to the hypervisor like it's done in
the guest partitions).
However, it's too early to map the page when the clock is initialized, so, the
actual mapping is happening later.
Signed-off-by: Stanislav Kinsburskiy <stanislav.kinsburskiy@gmail.com>
CC: "K. Y. Srinivasan" <kys@microsoft.com>
CC: Haiyang Zhang <haiyangz@microsoft.com>
CC: Wei Liu <wei.liu@kernel.org>
CC: Dexuan Cui <decui@microsoft.com>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: Borislav Petkov <bp@alien8.de>
CC: Dave Hansen <dave.hansen@linux.intel.com>
CC: x86@kernel.org
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: Daniel Lezcano <daniel.lezcano@linaro.org>
CC: linux-hyperv@vger.kernel.org
CC: linux-kernel@vger.kernel.org
---
arch/x86/hyperv/hv_init.c | 2 ++
drivers/clocksource/hyperv_timer.c | 34 +++++++++++++++++++++++++---------
include/clocksource/hyperv_timer.h | 1 +
3 files changed, 28 insertions(+), 9 deletions(-)
Comments
On Tue, Nov 01, 2022 at 05:31:20PM +0000, Stanislav Kinsburskii wrote:
> From: Stanislav Kinsburskiy <stanislav.kinsburskiy@gmail.com>
>
> It hyper-v root partition guest has to map the page, specified by the
> hypervisor (instead of providing the page to the hypervisor like it's done in
> the guest partitions).
> However, it's too early to map the page when the clock is initialized, so, the
> actual mapping is happening later.
>
> Signed-off-by: Stanislav Kinsburskiy <stanislav.kinsburskiy@gmail.com>
> CC: "K. Y. Srinivasan" <kys@microsoft.com>
> CC: Haiyang Zhang <haiyangz@microsoft.com>
> CC: Wei Liu <wei.liu@kernel.org>
> CC: Dexuan Cui <decui@microsoft.com>
> CC: Thomas Gleixner <tglx@linutronix.de>
> CC: Ingo Molnar <mingo@redhat.com>
> CC: Borislav Petkov <bp@alien8.de>
> CC: Dave Hansen <dave.hansen@linux.intel.com>
> CC: x86@kernel.org
> CC: "H. Peter Anvin" <hpa@zytor.com>
> CC: Daniel Lezcano <daniel.lezcano@linaro.org>
> CC: linux-hyperv@vger.kernel.org
> CC: linux-kernel@vger.kernel.org
> ---
> arch/x86/hyperv/hv_init.c | 2 ++
> drivers/clocksource/hyperv_timer.c | 34 +++++++++++++++++++++++++---------
> include/clocksource/hyperv_timer.h | 1 +
> 3 files changed, 28 insertions(+), 9 deletions(-)
>
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index f49bc3ec76e6..89954490af93 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -464,6 +464,8 @@ void __init hyperv_init(void)
> BUG_ON(!src);
> memcpy_to_page(pg, 0, src, HV_HYP_PAGE_SIZE);
> memunmap(src);
> +
> + hv_remap_tsc_clocksource();
> } else {
> hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
> wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
> index 635c14c1e3bf..4118e4bc9194 100644
> --- a/drivers/clocksource/hyperv_timer.c
> +++ b/drivers/clocksource/hyperv_timer.c
> @@ -508,9 +508,6 @@ static bool __init hv_init_tsc_clocksource(void)
> if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
> return false;
>
> - if (hv_root_partition)
> - return false;
> -
> /*
> * If Hyper-V offers TSC_INVARIANT, then the virtualized TSC correctly
> * handles frequency and offset changes due to live migration,
> @@ -528,16 +525,22 @@ static bool __init hv_init_tsc_clocksource(void)
> }
>
> hv_read_reference_counter = read_hv_clock_tsc;
> - tsc_pfn = __phys_to_pfn(virt_to_phys(tsc_page));
>
> /*
> - * The Hyper-V TLFS specifies to preserve the value of reserved
> - * bits in registers. So read the existing value, preserve the
> - * low order 12 bits, and add in the guest physical address
> - * (which already has at least the low 12 bits set to zero since
> - * it is page aligned). Also set the "enable" bit, which is bit 0.
> + * TSC page mapping works differently in root and guest partitions.
> + * - In guest partition the guest PFN has to be passed to the
> + * hypervisor.
> + * - In root partition it's other way around: the guest has to map the
> + * PFN, provided by the hypervisor.
> + * But it can't be mapped right here as it's too early and MMU isn't
> + * ready yet. So, we only set the enable bit here and will remap the
> + * page later in hv_remap_tsc_clocksource().
> */
> tsc_msr.as_uint64 = hv_get_register(HV_REGISTER_REFERENCE_TSC);
> + if (hv_root_partition)
> + tsc_pfn = tsc_msr.pfn;
Why store the PFN like this? While mapping the page it can be read from the
MSR. Once the tsc page is mapped it can by obtained by
__phys_to_pfn(virt_to_phys(tsc_page)).
> + else
> + tsc_pfn = __phys_to_pfn(virt_to_phys(tsc_page));
> tsc_msr.enable = 1;
> tsc_msr.pfn = tsc_pfn;
> hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr.as_uint64);
> @@ -572,3 +575,16 @@ void __init hv_init_clocksource(void)
> hv_sched_clock_offset = hv_read_reference_counter();
> hv_setup_sched_clock(read_hv_sched_clock_msr);
> }
> +
> +void __init hv_remap_tsc_clocksource(void)
> +{
> + if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
> + return;
> +
> + if (!hv_root_partition)
Perhaps this should WARN()?
Thanks,
Anirudh.
> + return;
> +
> + tsc_page = memremap(__pfn_to_phys(tsc_pfn), PAGE_SIZE, MEMREMAP_WB);
> + if (!tsc_page)
> + pr_err("Failed to remap Hyper-V TSC page.\n");
> +}
> diff --git a/include/clocksource/hyperv_timer.h b/include/clocksource/hyperv_timer.h
> index 3078d23faaea..783701a2102d 100644
> --- a/include/clocksource/hyperv_timer.h
> +++ b/include/clocksource/hyperv_timer.h
> @@ -31,6 +31,7 @@ extern void hv_stimer_global_cleanup(void);
> extern void hv_stimer0_isr(void);
>
> extern void hv_init_clocksource(void);
> +extern void hv_remap_tsc_clocksource(void);
>
> extern unsigned long hv_get_tsc_pfn(void);
> extern struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
>
From: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com> Sent: Tuesday, November 1, 2022 10:31 AM
>
> It hyper-v root partition guest has to map the page, specified by the
> hypervisor (instead of providing the page to the hypervisor like it's done in
> the guest partitions).
> However, it's too early to map the page when the clock is initialized, so, the
> actual mapping is happening later.
>
> Signed-off-by: Stanislav Kinsburskiy <stanislav.kinsburskiy@gmail.com>
> CC: "K. Y. Srinivasan" <kys@microsoft.com>
> CC: Haiyang Zhang <haiyangz@microsoft.com>
> CC: Wei Liu <wei.liu@kernel.org>
> CC: Dexuan Cui <decui@microsoft.com>
> CC: Thomas Gleixner <tglx@linutronix.de>
> CC: Ingo Molnar <mingo@redhat.com>
> CC: Borislav Petkov <bp@alien8.de>
> CC: Dave Hansen <dave.hansen@linux.intel.com>
> CC: x86@kernel.org
> CC: "H. Peter Anvin" <hpa@zytor.com>
> CC: Daniel Lezcano <daniel.lezcano@linaro.org>
> CC: linux-hyperv@vger.kernel.org
> CC: linux-kernel@vger.kernel.org
> ---
> arch/x86/hyperv/hv_init.c | 2 ++
> drivers/clocksource/hyperv_timer.c | 34 +++++++++++++++++++++++++---------
> include/clocksource/hyperv_timer.h | 1 +
> 3 files changed, 28 insertions(+), 9 deletions(-)
>
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index f49bc3ec76e6..89954490af93 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -464,6 +464,8 @@ void __init hyperv_init(void)
> BUG_ON(!src);
> memcpy_to_page(pg, 0, src, HV_HYP_PAGE_SIZE);
> memunmap(src);
> +
> + hv_remap_tsc_clocksource();
> } else {
> hypercall_msr.guest_physical_address =
> vmalloc_to_pfn(hv_hypercall_pg);
> wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
> index 635c14c1e3bf..4118e4bc9194 100644
> --- a/drivers/clocksource/hyperv_timer.c
> +++ b/drivers/clocksource/hyperv_timer.c
> @@ -508,9 +508,6 @@ static bool __init hv_init_tsc_clocksource(void)
> if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
> return false;
>
> - if (hv_root_partition)
> - return false;
> -
> /*
> * If Hyper-V offers TSC_INVARIANT, then the virtualized TSC correctly
> * handles frequency and offset changes due to live migration,
> @@ -528,16 +525,22 @@ static bool __init hv_init_tsc_clocksource(void)
> }
>
> hv_read_reference_counter = read_hv_clock_tsc;
> - tsc_pfn = __phys_to_pfn(virt_to_phys(tsc_page));
>
> /*
> - * The Hyper-V TLFS specifies to preserve the value of reserved
> - * bits in registers. So read the existing value, preserve the
> - * low order 12 bits, and add in the guest physical address
> - * (which already has at least the low 12 bits set to zero since
> - * it is page aligned). Also set the "enable" bit, which is bit 0.
> + * TSC page mapping works differently in root and guest partitions.
> + * - In guest partition the guest PFN has to be passed to the
> + * hypervisor.
> + * - In root partition it's other way around: the guest has to map the
> + * PFN, provided by the hypervisor.
> + * But it can't be mapped right here as it's too early and MMU isn't
> + * ready yet. So, we only set the enable bit here and will remap the
> + * page later in hv_remap_tsc_clocksource().
> */
> tsc_msr.as_uint64 = hv_get_register(HV_REGISTER_REFERENCE_TSC);
> + if (hv_root_partition)
> + tsc_pfn = tsc_msr.pfn;
> + else
> + tsc_pfn = __phys_to_pfn(virt_to_phys(tsc_page));
> tsc_msr.enable = 1;
> tsc_msr.pfn = tsc_pfn;
> hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr.as_uint64);
> @@ -572,3 +575,16 @@ void __init hv_init_clocksource(void)
> hv_sched_clock_offset = hv_read_reference_counter();
> hv_setup_sched_clock(read_hv_sched_clock_msr);
> }
> +
> +void __init hv_remap_tsc_clocksource(void)
> +{
> + if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
> + return;
> +
> + if (!hv_root_partition)
> + return;
> +
> + tsc_page = memremap(__pfn_to_phys(tsc_pfn), PAGE_SIZE, MEMREMAP_WB);
Instead of hard-coding PAGE_SIZE here, could this be sizeof(union tsc_pg)?
In the past we sorted out how to make the memory allocated for the TSC page be
a full guest page (not Microsoft Hypervisor page, which could be different) so that
it can be mapped into user space for vDSO. So it seems appropriate to piggyback
on that union definition rather than hardcoding PAGE_SIZE.
> + if (!tsc_page)
> + pr_err("Failed to remap Hyper-V TSC page.\n");
> +}
> diff --git a/include/clocksource/hyperv_timer.h
> b/include/clocksource/hyperv_timer.h
> index 3078d23faaea..783701a2102d 100644
> --- a/include/clocksource/hyperv_timer.h
> +++ b/include/clocksource/hyperv_timer.h
> @@ -31,6 +31,7 @@ extern void hv_stimer_global_cleanup(void);
> extern void hv_stimer0_isr(void);
>
> extern void hv_init_clocksource(void);
> +extern void hv_remap_tsc_clocksource(void);
>
> extern unsigned long hv_get_tsc_pfn(void);
> extern struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
>
@@ -464,6 +464,8 @@ void __init hyperv_init(void)
BUG_ON(!src);
memcpy_to_page(pg, 0, src, HV_HYP_PAGE_SIZE);
memunmap(src);
+
+ hv_remap_tsc_clocksource();
} else {
hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
@@ -508,9 +508,6 @@ static bool __init hv_init_tsc_clocksource(void)
if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
return false;
- if (hv_root_partition)
- return false;
-
/*
* If Hyper-V offers TSC_INVARIANT, then the virtualized TSC correctly
* handles frequency and offset changes due to live migration,
@@ -528,16 +525,22 @@ static bool __init hv_init_tsc_clocksource(void)
}
hv_read_reference_counter = read_hv_clock_tsc;
- tsc_pfn = __phys_to_pfn(virt_to_phys(tsc_page));
/*
- * The Hyper-V TLFS specifies to preserve the value of reserved
- * bits in registers. So read the existing value, preserve the
- * low order 12 bits, and add in the guest physical address
- * (which already has at least the low 12 bits set to zero since
- * it is page aligned). Also set the "enable" bit, which is bit 0.
+ * TSC page mapping works differently in root and guest partitions.
+ * - In guest partition the guest PFN has to be passed to the
+ * hypervisor.
+ * - In root partition it's other way around: the guest has to map the
+ * PFN, provided by the hypervisor.
+ * But it can't be mapped right here as it's too early and MMU isn't
+ * ready yet. So, we only set the enable bit here and will remap the
+ * page later in hv_remap_tsc_clocksource().
*/
tsc_msr.as_uint64 = hv_get_register(HV_REGISTER_REFERENCE_TSC);
+ if (hv_root_partition)
+ tsc_pfn = tsc_msr.pfn;
+ else
+ tsc_pfn = __phys_to_pfn(virt_to_phys(tsc_page));
tsc_msr.enable = 1;
tsc_msr.pfn = tsc_pfn;
hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr.as_uint64);
@@ -572,3 +575,16 @@ void __init hv_init_clocksource(void)
hv_sched_clock_offset = hv_read_reference_counter();
hv_setup_sched_clock(read_hv_sched_clock_msr);
}
+
+void __init hv_remap_tsc_clocksource(void)
+{
+ if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
+ return;
+
+ if (!hv_root_partition)
+ return;
+
+ tsc_page = memremap(__pfn_to_phys(tsc_pfn), PAGE_SIZE, MEMREMAP_WB);
+ if (!tsc_page)
+ pr_err("Failed to remap Hyper-V TSC page.\n");
+}
@@ -31,6 +31,7 @@ extern void hv_stimer_global_cleanup(void);
extern void hv_stimer0_isr(void);
extern void hv_init_clocksource(void);
+extern void hv_remap_tsc_clocksource(void);
extern unsigned long hv_get_tsc_pfn(void);
extern struct ms_hyperv_tsc_page *hv_get_tsc_page(void);