[v4] KVM x86/xen: add an override for PVCLOCK_TSC_STABLE_BIT
Commit Message
From: Paul Durrant <pdurrant@amazon.com>
Unless explicitly told to do so (by passing 'clocksource=tsc' and
'tsc=stable:socket', and then jumping through some hoops concerning
potential CPU hotplug) Xen will never use TSC as its clocksource.
Hence, by default, a Xen guest will not see PVCLOCK_TSC_STABLE_BIT set
in either the primary or secondary pvclock memory areas. This has
led to bugs in some guest kernels which only become evident if
PVCLOCK_TSC_STABLE_BIT *is* set in the pvclocks. Hence, to support
such guests, give the VMM a new Xen HVM config flag to tell KVM to
forcibly clear the bit in the Xen pvclocks.
Signed-off-by: Paul Durrant <pdurrant@amazon.com>
---
v4:
- Re-base.
- Re-work 'update_pvclock' test as requested.
v3:
- Moved clearing of PVCLOCK_TSC_STABLE_BIT the right side of the
memcpy().
- Added an all-vCPUs KVM_REQ_CLOCK_UPDATE when the HVM config
flag is changed.
---
Documentation/virt/kvm/api.rst | 6 ++++++
arch/x86/kvm/x86.c | 27 ++++++++++++++++++++++-----
arch/x86/kvm/xen.c | 9 ++++++++-
include/uapi/linux/kvm.h | 1 +
4 files changed, 37 insertions(+), 6 deletions(-)
base-commit: 35dcbd9e47035f98f3910ae420bf10892c9bdc99
Comments
Hi Paul,
kernel test robot noticed the following build warnings:
[auto build test WARNING on 35dcbd9e47035f98f3910ae420bf10892c9bdc99]
url: https://github.com/intel-lab-lkp/linux/commits/Paul-Durrant/KVM-x86-xen-add-an-override-for-PVCLOCK_TSC_STABLE_BIT/20231102-034122
base: 35dcbd9e47035f98f3910ae420bf10892c9bdc99
patch link: https://lore.kernel.org/r/20231101183032.1498211-1-paul%40xen.org
patch subject: [PATCH v4] KVM x86/xen: add an override for PVCLOCK_TSC_STABLE_BIT
config: i386-randconfig-013-20231102 (https://download.01.org/0day-ci/archive/20231102/202311021159.ppYESBYx-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231102/202311021159.ppYESBYx-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202311021159.ppYESBYx-lkp@intel.com/
All warnings (new ones prefixed by >>):
arch/x86/kvm/x86.c: In function 'kvm_guest_time_update':
>> arch/x86/kvm/x86.c:3176:14: warning: unused variable 'xen_pvclock_tsc_unstable' [-Wunused-variable]
3176 | bool xen_pvclock_tsc_unstable =
| ^~~~~~~~~~~~~~~~~~~~~~~~
vim +/xen_pvclock_tsc_unstable +3176 arch/x86/kvm/x86.c
3158
3159 static int kvm_guest_time_update(struct kvm_vcpu *v)
3160 {
3161 unsigned long flags, tgt_tsc_khz;
3162 unsigned seq;
3163 struct kvm_vcpu_arch *vcpu = &v->arch;
3164 struct kvm_arch *ka = &v->kvm->arch;
3165 s64 kernel_ns;
3166 u64 tsc_timestamp, host_tsc;
3167 u8 pvclock_flags;
3168 bool use_master_clock;
3169
3170 /*
3171 * For Xen guests we may need to override PVCLOCK_TSC_STABLE_BIT as unless
3172 * explicitly told to use TSC as its clocksource Xen will not set this bit.
3173 * This default behaviour led to bugs in some guest kernels which cause
3174 * problems if they observe PVCLOCK_TSC_STABLE_BIT in the pvclock flags.
3175 */
> 3176 bool xen_pvclock_tsc_unstable =
3177 ka->xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE;
3178
3179 kernel_ns = 0;
3180 host_tsc = 0;
3181
3182 /*
3183 * If the host uses TSC clock, then passthrough TSC as stable
3184 * to the guest.
3185 */
3186 do {
3187 seq = read_seqcount_begin(&ka->pvclock_sc);
3188 use_master_clock = ka->use_master_clock;
3189 if (use_master_clock) {
3190 host_tsc = ka->master_cycle_now;
3191 kernel_ns = ka->master_kernel_ns;
3192 }
3193 } while (read_seqcount_retry(&ka->pvclock_sc, seq));
3194
3195 /* Keep irq disabled to prevent changes to the clock */
3196 local_irq_save(flags);
3197 tgt_tsc_khz = get_cpu_tsc_khz();
3198 if (unlikely(tgt_tsc_khz == 0)) {
3199 local_irq_restore(flags);
3200 kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
3201 return 1;
3202 }
3203 if (!use_master_clock) {
3204 host_tsc = rdtsc();
3205 kernel_ns = get_kvmclock_base_ns();
3206 }
3207
3208 tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
3209
3210 /*
3211 * We may have to catch up the TSC to match elapsed wall clock
3212 * time for two reasons, even if kvmclock is used.
3213 * 1) CPU could have been running below the maximum TSC rate
3214 * 2) Broken TSC compensation resets the base at each VCPU
3215 * entry to avoid unknown leaps of TSC even when running
3216 * again on the same CPU. This may cause apparent elapsed
3217 * time to disappear, and the guest to stand still or run
3218 * very slowly.
3219 */
3220 if (vcpu->tsc_catchup) {
3221 u64 tsc = compute_guest_tsc(v, kernel_ns);
3222 if (tsc > tsc_timestamp) {
3223 adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
3224 tsc_timestamp = tsc;
3225 }
3226 }
3227
3228 local_irq_restore(flags);
3229
3230 /* With all the info we got, fill in the values */
3231
3232 if (kvm_caps.has_tsc_control)
3233 tgt_tsc_khz = kvm_scale_tsc(tgt_tsc_khz,
3234 v->arch.l1_tsc_scaling_ratio);
3235
3236 if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
3237 kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
3238 &vcpu->hv_clock.tsc_shift,
3239 &vcpu->hv_clock.tsc_to_system_mul);
3240 vcpu->hw_tsc_khz = tgt_tsc_khz;
3241 kvm_xen_update_tsc_info(v);
3242 }
3243
3244 vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
3245 vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
3246 vcpu->last_guest_tsc = tsc_timestamp;
3247
3248 /* If the host uses TSC clocksource, then it is stable */
3249 pvclock_flags = 0;
3250 if (use_master_clock)
3251 pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
3252
3253 vcpu->hv_clock.flags = pvclock_flags;
3254
3255 if (vcpu->pv_time.active)
3256 kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0, false);
3257 #ifdef CONFIG_KVM_XEN
3258 if (vcpu->xen.vcpu_info_cache.active)
3259 kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_info_cache,
3260 offsetof(struct compat_vcpu_info, time),
3261 xen_pvclock_tsc_unstable);
3262 if (vcpu->xen.vcpu_time_info_cache.active)
3263 kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0,
3264 xen_pvclock_tsc_unstable);
3265 #endif
3266 kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
3267 return 0;
3268 }
3269
@@ -8270,6 +8270,7 @@ PVHVM guests. Valid flags are::
#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4)
#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5)
#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6)
+ #define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7)
The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG
ioctl is available, for the guest to set its hypercall page.
@@ -8313,6 +8314,11 @@ behave more correctly, not using the XEN_RUNSTATE_UPDATE flag until/unless
specifically enabled (by the guest making the hypercall, causing the VMM
to enable the KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG attribute).
+The KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE flag indicates that KVM supports
+clearing the PVCLOCK_TSC_STABLE_BIT flag in Xen pvclock sources. This will be
+done when the KVM_CAP_XEN_HVM ioctl sets the
+KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE flag.
+
8.31 KVM_CAP_PPC_MULTITCE
-------------------------
@@ -3104,7 +3104,8 @@ u64 get_kvmclock_ns(struct kvm *kvm)
static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
struct gfn_to_pfn_cache *gpc,
- unsigned int offset)
+ unsigned int offset,
+ bool force_tsc_unstable)
{
struct kvm_vcpu_arch *vcpu = &v->arch;
struct pvclock_vcpu_time_info *guest_hv_clock;
@@ -3141,6 +3142,10 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
}
memcpy(guest_hv_clock, &vcpu->hv_clock, sizeof(*guest_hv_clock));
+
+ if (force_tsc_unstable)
+ guest_hv_clock->flags &= ~PVCLOCK_TSC_STABLE_BIT;
+
smp_wmb();
guest_hv_clock->version = ++vcpu->hv_clock.version;
@@ -3162,6 +3167,15 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
u8 pvclock_flags;
bool use_master_clock;
+ /*
+ * For Xen guests we may need to override PVCLOCK_TSC_STABLE_BIT as unless
+ * explicitly told to use TSC as its clocksource Xen will not set this bit.
+ * This default behaviour led to bugs in some guest kernels which cause
+ * problems if they observe PVCLOCK_TSC_STABLE_BIT in the pvclock flags.
+ */
+ bool xen_pvclock_tsc_unstable =
+ ka->xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE;
+
kernel_ns = 0;
host_tsc = 0;
@@ -3239,13 +3253,15 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
vcpu->hv_clock.flags = pvclock_flags;
if (vcpu->pv_time.active)
- kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0);
+ kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0, false);
#ifdef CONFIG_KVM_XEN
if (vcpu->xen.vcpu_info_cache.active)
kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_info_cache,
- offsetof(struct compat_vcpu_info, time));
+ offsetof(struct compat_vcpu_info, time),
+ xen_pvclock_tsc_unstable);
if (vcpu->xen.vcpu_time_info_cache.active)
- kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0);
+ kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0,
+ xen_pvclock_tsc_unstable);
#endif
kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
return 0;
@@ -4638,7 +4654,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL |
KVM_XEN_HVM_CONFIG_SHARED_INFO |
KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL |
- KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
+ KVM_XEN_HVM_CONFIG_EVTCHN_SEND |
+ KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE;
if (sched_info_on())
r |= KVM_XEN_HVM_CONFIG_RUNSTATE |
KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG;
@@ -1162,7 +1162,9 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc)
{
/* Only some feature flags need to be *enabled* by userspace */
u32 permitted_flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL |
- KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
+ KVM_XEN_HVM_CONFIG_EVTCHN_SEND |
+ KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE;
+ u32 old_flags;
if (xhc->flags & ~permitted_flags)
return -EINVAL;
@@ -1183,9 +1185,14 @@ int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc)
else if (!xhc->msr && kvm->arch.xen_hvm_config.msr)
static_branch_slow_dec_deferred(&kvm_xen_enabled);
+ old_flags = kvm->arch.xen_hvm_config.flags;
memcpy(&kvm->arch.xen_hvm_config, xhc, sizeof(*xhc));
mutex_unlock(&kvm->arch.xen.xen_lock);
+
+ if ((old_flags ^ xhc->flags) & KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE)
+ kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
+
return 0;
}
@@ -1282,6 +1282,7 @@ struct kvm_x86_mce {
#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4)
#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5)
#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6)
+#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7)
struct kvm_xen_hvm_config {
__u32 flags;