[v4] KVM x86/xen: add an override for PVCLOCK_TSC_STABLE_BIT

Message ID 20231101183032.1498211-1-paul@xen.org
State New
Headers
Series [v4] KVM x86/xen: add an override for PVCLOCK_TSC_STABLE_BIT |

Commit Message

Paul Durrant Nov. 1, 2023, 6:30 p.m. UTC
  From: Paul Durrant <pdurrant@amazon.com>

Unless explicitly told to do so (by passing 'clocksource=tsc' and
'tsc=stable:socket', and then jumping through some hoops concerning
potential CPU hotplug) Xen will never use TSC as its clocksource.
Hence, by default, a Xen guest will not see PVCLOCK_TSC_STABLE_BIT set
in either the primary or secondary pvclock memory areas. This has
led to bugs in some guest kernels which only become evident if
PVCLOCK_TSC_STABLE_BIT *is* set in the pvclocks. Hence, to support
such guests, give the VMM a new Xen HVM config flag to tell KVM to
forcibly clear the bit in the Xen pvclocks.

Signed-off-by: Paul Durrant <pdurrant@amazon.com>
---

v4:
 - Re-base.
 - Re-work 'update_pvclock' test as requested.

v3:
 - Moved clearing of PVCLOCK_TSC_STABLE_BIT the right side of the
   memcpy().
 - Added an all-vCPUs KVM_REQ_CLOCK_UPDATE when the HVM config
   flag is changed.
---
 Documentation/virt/kvm/api.rst |  6 ++++++
 arch/x86/kvm/x86.c             | 27 ++++++++++++++++++++++-----
 arch/x86/kvm/xen.c             |  9 ++++++++-
 include/uapi/linux/kvm.h       |  1 +
 4 files changed, 37 insertions(+), 6 deletions(-)


base-commit: 35dcbd9e47035f98f3910ae420bf10892c9bdc99
  

Comments

kernel test robot Nov. 2, 2023, 4:03 a.m. UTC | #1
Hi Paul,

kernel test robot noticed the following build warnings:

[auto build test WARNING on 35dcbd9e47035f98f3910ae420bf10892c9bdc99]

url:    https://github.com/intel-lab-lkp/linux/commits/Paul-Durrant/KVM-x86-xen-add-an-override-for-PVCLOCK_TSC_STABLE_BIT/20231102-034122
base:   35dcbd9e47035f98f3910ae420bf10892c9bdc99
patch link:    https://lore.kernel.org/r/20231101183032.1498211-1-paul%40xen.org
patch subject: [PATCH v4] KVM x86/xen: add an override for PVCLOCK_TSC_STABLE_BIT
config: i386-randconfig-013-20231102 (https://download.01.org/0day-ci/archive/20231102/202311021159.ppYESBYx-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231102/202311021159.ppYESBYx-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202311021159.ppYESBYx-lkp@intel.com/

All warnings (new ones prefixed by >>):

   arch/x86/kvm/x86.c: In function 'kvm_guest_time_update':
>> arch/x86/kvm/x86.c:3176:14: warning: unused variable 'xen_pvclock_tsc_unstable' [-Wunused-variable]
    3176 |         bool xen_pvclock_tsc_unstable =
         |              ^~~~~~~~~~~~~~~~~~~~~~~~


vim +/xen_pvclock_tsc_unstable +3176 arch/x86/kvm/x86.c

  3158	
  3159	static int kvm_guest_time_update(struct kvm_vcpu *v)
  3160	{
  3161		unsigned long flags, tgt_tsc_khz;
  3162		unsigned seq;
  3163		struct kvm_vcpu_arch *vcpu = &v->arch;
  3164		struct kvm_arch *ka = &v->kvm->arch;
  3165		s64 kernel_ns;
  3166		u64 tsc_timestamp, host_tsc;
  3167		u8 pvclock_flags;
  3168		bool use_master_clock;
  3169	
  3170		/*
  3171		 * For Xen guests we may need to override PVCLOCK_TSC_STABLE_BIT as unless
  3172		 * explicitly told to use TSC as its clocksource Xen will not set this bit.
  3173		 * This default behaviour led to bugs in some guest kernels which cause
  3174		 * problems if they observe PVCLOCK_TSC_STABLE_BIT in the pvclock flags.
  3175		 */
> 3176		bool xen_pvclock_tsc_unstable =
  3177			ka->xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE;
  3178	
  3179		kernel_ns = 0;
  3180		host_tsc = 0;
  3181	
  3182		/*
  3183		 * If the host uses TSC clock, then passthrough TSC as stable
  3184		 * to the guest.
  3185		 */
  3186		do {
  3187			seq = read_seqcount_begin(&ka->pvclock_sc);
  3188			use_master_clock = ka->use_master_clock;
  3189			if (use_master_clock) {
  3190				host_tsc = ka->master_cycle_now;
  3191				kernel_ns = ka->master_kernel_ns;
  3192			}
  3193		} while (read_seqcount_retry(&ka->pvclock_sc, seq));
  3194	
  3195		/* Keep irq disabled to prevent changes to the clock */
  3196		local_irq_save(flags);
  3197		tgt_tsc_khz = get_cpu_tsc_khz();
  3198		if (unlikely(tgt_tsc_khz == 0)) {
  3199			local_irq_restore(flags);
  3200			kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
  3201			return 1;
  3202		}
  3203		if (!use_master_clock) {
  3204			host_tsc = rdtsc();
  3205			kernel_ns = get_kvmclock_base_ns();
  3206		}
  3207	
  3208		tsc_timestamp = kvm_read_l1_tsc(v, host_tsc);
  3209	
  3210		/*
  3211		 * We may have to catch up the TSC to match elapsed wall clock
  3212		 * time for two reasons, even if kvmclock is used.
  3213		 *   1) CPU could have been running below the maximum TSC rate
  3214		 *   2) Broken TSC compensation resets the base at each VCPU
  3215		 *      entry to avoid unknown leaps of TSC even when running
  3216		 *      again on the same CPU.  This may cause apparent elapsed
  3217		 *      time to disappear, and the guest to stand still or run
  3218		 *	very slowly.
  3219		 */
  3220		if (vcpu->tsc_catchup) {
  3221			u64 tsc = compute_guest_tsc(v, kernel_ns);
  3222			if (tsc > tsc_timestamp) {
  3223				adjust_tsc_offset_guest(v, tsc - tsc_timestamp);
  3224				tsc_timestamp = tsc;
  3225			}
  3226		}
  3227	
  3228		local_irq_restore(flags);
  3229	
  3230		/* With all the info we got, fill in the values */
  3231	
  3232		if (kvm_caps.has_tsc_control)
  3233			tgt_tsc_khz = kvm_scale_tsc(tgt_tsc_khz,
  3234						    v->arch.l1_tsc_scaling_ratio);
  3235	
  3236		if (unlikely(vcpu->hw_tsc_khz != tgt_tsc_khz)) {
  3237			kvm_get_time_scale(NSEC_PER_SEC, tgt_tsc_khz * 1000LL,
  3238					   &vcpu->hv_clock.tsc_shift,
  3239					   &vcpu->hv_clock.tsc_to_system_mul);
  3240			vcpu->hw_tsc_khz = tgt_tsc_khz;
  3241			kvm_xen_update_tsc_info(v);
  3242		}
  3243	
  3244		vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
  3245		vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
  3246		vcpu->last_guest_tsc = tsc_timestamp;
  3247	
  3248		/* If the host uses TSC clocksource, then it is stable */
  3249		pvclock_flags = 0;
  3250		if (use_master_clock)
  3251			pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
  3252	
  3253		vcpu->hv_clock.flags = pvclock_flags;
  3254	
  3255		if (vcpu->pv_time.active)
  3256			kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0, false);
  3257	#ifdef CONFIG_KVM_XEN
  3258		if (vcpu->xen.vcpu_info_cache.active)
  3259			kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_info_cache,
  3260						offsetof(struct compat_vcpu_info, time),
  3261						xen_pvclock_tsc_unstable);
  3262		if (vcpu->xen.vcpu_time_info_cache.active)
  3263			kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0,
  3264						xen_pvclock_tsc_unstable);
  3265	#endif
  3266		kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
  3267		return 0;
  3268	}
  3269
  

Patch

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 67347d827242..880b929a5cb1 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -8270,6 +8270,7 @@  PVHVM guests. Valid flags are::
   #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL		(1 << 4)
   #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND		(1 << 5)
   #define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG	(1 << 6)
+  #define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE	(1 << 7)
 
 The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG
 ioctl is available, for the guest to set its hypercall page.
@@ -8313,6 +8314,11 @@  behave more correctly, not using the XEN_RUNSTATE_UPDATE flag until/unless
 specifically enabled (by the guest making the hypercall, causing the VMM
 to enable the KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG attribute).
 
+The KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE flag indicates that KVM supports
+clearing the PVCLOCK_TSC_STABLE_BIT flag in Xen pvclock sources. This will be
+done when the KVM_CAP_XEN_HVM ioctl sets the
+KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE flag.
+
 8.31 KVM_CAP_PPC_MULTITCE
 -------------------------
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2c924075f6f1..e43449382cba 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3104,7 +3104,8 @@  u64 get_kvmclock_ns(struct kvm *kvm)
 
 static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
 				    struct gfn_to_pfn_cache *gpc,
-				    unsigned int offset)
+				    unsigned int offset,
+				    bool force_tsc_unstable)
 {
 	struct kvm_vcpu_arch *vcpu = &v->arch;
 	struct pvclock_vcpu_time_info *guest_hv_clock;
@@ -3141,6 +3142,10 @@  static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
 	}
 
 	memcpy(guest_hv_clock, &vcpu->hv_clock, sizeof(*guest_hv_clock));
+
+	if (force_tsc_unstable)
+		guest_hv_clock->flags &= ~PVCLOCK_TSC_STABLE_BIT;
+
 	smp_wmb();
 
 	guest_hv_clock->version = ++vcpu->hv_clock.version;
@@ -3162,6 +3167,15 @@  static int kvm_guest_time_update(struct kvm_vcpu *v)
 	u8 pvclock_flags;
 	bool use_master_clock;
 
+	/*
+	 * For Xen guests we may need to override PVCLOCK_TSC_STABLE_BIT as unless
+	 * explicitly told to use TSC as its clocksource Xen will not set this bit.
+	 * This default behaviour led to bugs in some guest kernels which cause
+	 * problems if they observe PVCLOCK_TSC_STABLE_BIT in the pvclock flags.
+	 */
+	bool xen_pvclock_tsc_unstable =
+		ka->xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE;
+
 	kernel_ns = 0;
 	host_tsc = 0;
 
@@ -3239,13 +3253,15 @@  static int kvm_guest_time_update(struct kvm_vcpu *v)
 	vcpu->hv_clock.flags = pvclock_flags;
 
 	if (vcpu->pv_time.active)
-		kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0);
+		kvm_setup_guest_pvclock(v, &vcpu->pv_time, 0, false);
 #ifdef CONFIG_KVM_XEN
 	if (vcpu->xen.vcpu_info_cache.active)
 		kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_info_cache,
-					offsetof(struct compat_vcpu_info, time));
+					offsetof(struct compat_vcpu_info, time),
+					xen_pvclock_tsc_unstable);
 	if (vcpu->xen.vcpu_time_info_cache.active)
-		kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0);
+		kvm_setup_guest_pvclock(v, &vcpu->xen.vcpu_time_info_cache, 0,
+					xen_pvclock_tsc_unstable);
 #endif
 	kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
 	return 0;
@@ -4638,7 +4654,8 @@  int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		    KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL |
 		    KVM_XEN_HVM_CONFIG_SHARED_INFO |
 		    KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL |
-		    KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
+		    KVM_XEN_HVM_CONFIG_EVTCHN_SEND |
+		    KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE;
 		if (sched_info_on())
 			r |= KVM_XEN_HVM_CONFIG_RUNSTATE |
 			     KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG;
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index e53fad915a62..e43948b87f94 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -1162,7 +1162,9 @@  int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc)
 {
 	/* Only some feature flags need to be *enabled* by userspace */
 	u32 permitted_flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL |
-		KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
+		KVM_XEN_HVM_CONFIG_EVTCHN_SEND |
+		KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE;
+	u32 old_flags;
 
 	if (xhc->flags & ~permitted_flags)
 		return -EINVAL;
@@ -1183,9 +1185,14 @@  int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc)
 	else if (!xhc->msr && kvm->arch.xen_hvm_config.msr)
 		static_branch_slow_dec_deferred(&kvm_xen_enabled);
 
+	old_flags = kvm->arch.xen_hvm_config.flags;
 	memcpy(&kvm->arch.xen_hvm_config, xhc, sizeof(*xhc));
 
 	mutex_unlock(&kvm->arch.xen.xen_lock);
+
+	if ((old_flags ^ xhc->flags) & KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE)
+		kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
+
 	return 0;
 }
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 13065dd96132..e21b53e8358d 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1282,6 +1282,7 @@  struct kvm_x86_mce {
 #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL	(1 << 4)
 #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND		(1 << 5)
 #define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG	(1 << 6)
+#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE	(1 << 7)
 
 struct kvm_xen_hvm_config {
 	__u32 flags;