[RFC,08/34] x86/cpu: Add CLFLUSH size helper

Message ID 20240222183937.981D71D9@davehans-spike.ostc.intel.com
State New
Headers
Series x86: Rework system-wide configuration masquerading as per-cpu data |

Commit Message

Dave Hansen Feb. 22, 2024, 6:39 p.m. UTC
  From: Dave Hansen <dave.hansen@linux.intel.com>

This is probably starting to look familiar:  The size of a cacheline is
fundamental system-wide information.  Keeping it per-cpu is just silly.

Introduce a system-wide helper for looking up the cacheline size and
use it.

This does one slightly odd looking thing, it stops setting
c->x86_clflush_size on all but the boot CPU.  This is functionally OK
because all readers of the secondary CPU values also go away.  It also
makes it explicit that the 'boot_cpu_data' is the one true system-wide
value.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Mikulas Patocka <mpatocka@redhat.com>
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Daniel Vetter <daniel@ffwll.ch>

---

 b/arch/x86/include/asm/processor.h       |    5 +++++
 b/arch/x86/kernel/cpu/centaur.c          |    2 +-
 b/arch/x86/kernel/cpu/common.c           |    9 +++++----
 b/arch/x86/kernel/cpu/intel.c            |    2 +-
 b/arch/x86/kernel/cpu/proc.c             |    2 +-
 b/arch/x86/lib/usercopy_64.c             |    7 +++----
 b/arch/x86/mm/pat/set_memory.c           |    2 +-
 b/arch/x86/pci/common.c                  |    2 +-
 b/drivers/gpu/drm/drm_cache.c            |    4 ++--
 b/drivers/gpu/drm/i915/i915_cmd_parser.c |    3 +--
 b/drivers/gpu/drm/i915/i915_gem.c        |    2 +-
 b/drivers/md/dm-writecache.c             |    2 +-
 12 files changed, 23 insertions(+), 19 deletions(-)
  

Patch

diff -puN arch/x86/include/asm/processor.h~x86_clflush_size-func arch/x86/include/asm/processor.h
--- a/arch/x86/include/asm/processor.h~x86_clflush_size-func	2024-02-22 10:08:52.112596720 -0800
+++ b/arch/x86/include/asm/processor.h	2024-02-22 10:08:52.132597505 -0800
@@ -777,4 +777,9 @@  static inline u8 x86_virt_bits(void)
 	return boot_cpu_data.x86_virt_bits;
 }
 
+static inline u8 x86_clflush_size(void)
+{
+	return boot_cpu_data.x86_clflush_size;
+}
+
 #endif /* _ASM_X86_PROCESSOR_H */
diff -puN arch/x86/kernel/cpu/centaur.c~x86_clflush_size-func arch/x86/kernel/cpu/centaur.c
--- a/arch/x86/kernel/cpu/centaur.c~x86_clflush_size-func	2024-02-22 10:08:52.112596720 -0800
+++ b/arch/x86/kernel/cpu/centaur.c	2024-02-22 10:08:52.132597505 -0800
@@ -62,7 +62,7 @@  static void init_c3(struct cpuinfo_x86 *
 		set_cpu_cap(c, X86_FEATURE_3DNOW);
 #endif
 	if (c->x86 == 0x6 && c->x86_model >= 0xf) {
-		c->x86_cache_alignment = c->x86_clflush_size * 2;
+		c->x86_cache_alignment = x86_clflush_size() * 2;
 		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
 	}
 
diff -puN arch/x86/kernel/cpu/common.c~x86_clflush_size-func arch/x86/kernel/cpu/common.c
--- a/arch/x86/kernel/cpu/common.c~x86_clflush_size-func	2024-02-22 10:08:52.112596720 -0800
+++ b/arch/x86/kernel/cpu/common.c	2024-02-22 10:08:52.132597505 -0800
@@ -954,8 +954,9 @@  void cpu_detect(struct cpuinfo_x86 *c)
 		c->x86_stepping	= x86_stepping(tfms);
 
 		if (cap0 & (1<<19)) {
-			c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
-			c->x86_cache_alignment = c->x86_clflush_size;
+			if (c == &boot_cpu_data)
+				c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
+			c->x86_cache_alignment = x86_clflush_size();
 		}
 	}
 }
@@ -1123,7 +1124,7 @@  void get_cpu_address_sizes(struct cpuinf
 		}
 	}
 	c->x86_cache_bits = c->x86_phys_bits;
-	c->x86_cache_alignment = c->x86_clflush_size;
+	c->x86_cache_alignment = x86_clflush_size();
 }
 
 static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
@@ -1831,7 +1832,7 @@  static void identify_cpu(struct cpuinfo_
 	c->x86_phys_bits = 32;
 	c->x86_virt_bits = 32;
 #endif
-	c->x86_cache_alignment = c->x86_clflush_size;
+	c->x86_cache_alignment = x86_clflush_size();
 	memset(&c->x86_capability, 0, sizeof(c->x86_capability));
 #ifdef CONFIG_X86_VMX_FEATURE_NAMES
 	memset(&c->vmx_capability, 0, sizeof(c->vmx_capability));
diff -puN arch/x86/kernel/cpu/intel.c~x86_clflush_size-func arch/x86/kernel/cpu/intel.c
--- a/arch/x86/kernel/cpu/intel.c~x86_clflush_size-func	2024-02-22 10:08:52.116596877 -0800
+++ b/arch/x86/kernel/cpu/intel.c	2024-02-22 10:08:52.132597505 -0800
@@ -653,7 +653,7 @@  static void init_intel(struct cpuinfo_x8
 
 #ifdef CONFIG_X86_64
 	if (c->x86 == 15)
-		c->x86_cache_alignment = c->x86_clflush_size * 2;
+		c->x86_cache_alignment = x86_clflush_size() * 2;
 	if (c->x86 == 6)
 		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
 #else
diff -puN arch/x86/kernel/cpu/proc.c~x86_clflush_size-func arch/x86/kernel/cpu/proc.c
--- a/arch/x86/kernel/cpu/proc.c~x86_clflush_size-func	2024-02-22 10:08:52.116596877 -0800
+++ b/arch/x86/kernel/cpu/proc.c	2024-02-22 10:08:52.132597505 -0800
@@ -130,7 +130,7 @@  static int show_cpuinfo(struct seq_file
 	if (c->x86_tlbsize > 0)
 		seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
 #endif
-	seq_printf(m, "clflush size\t: %u\n", c->x86_clflush_size);
+	seq_printf(m, "clflush size\t: %u\n", x86_clflush_size());
 	seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
 	seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
 		   x86_phys_bits(), x86_virt_bits());
diff -puN arch/x86/lib/usercopy_64.c~x86_clflush_size-func arch/x86/lib/usercopy_64.c
--- a/arch/x86/lib/usercopy_64.c~x86_clflush_size-func	2024-02-22 10:08:52.120597034 -0800
+++ b/arch/x86/lib/usercopy_64.c	2024-02-22 10:08:52.132597505 -0800
@@ -27,13 +27,12 @@ 
  */
 static void clean_cache_range(void *addr, size_t size)
 {
-	u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
-	unsigned long clflush_mask = x86_clflush_size - 1;
+	unsigned long clflush_mask = x86_clflush_size() - 1;
 	void *vend = addr + size;
 	void *p;
 
 	for (p = (void *)((unsigned long)addr & ~clflush_mask);
-	     p < vend; p += x86_clflush_size)
+	     p < vend; p += x86_clflush_size())
 		clwb(p);
 }
 
@@ -65,7 +64,7 @@  long __copy_user_flushcache(void *dst, c
 			clean_cache_range(dst, size);
 	} else {
 		if (!IS_ALIGNED(dest, 8)) {
-			dest = ALIGN(dest, boot_cpu_data.x86_clflush_size);
+			dest = ALIGN(dest, x86_clflush_size());
 			clean_cache_range(dst, 1);
 		}
 
diff -puN arch/x86/mm/pat/set_memory.c~x86_clflush_size-func arch/x86/mm/pat/set_memory.c
--- a/arch/x86/mm/pat/set_memory.c~x86_clflush_size-func	2024-02-22 10:08:52.120597034 -0800
+++ b/arch/x86/mm/pat/set_memory.c	2024-02-22 10:08:52.132597505 -0800
@@ -314,7 +314,7 @@  static unsigned long __cpa_addr(struct c
 
 static void clflush_cache_range_opt(void *vaddr, unsigned int size)
 {
-	const unsigned long clflush_size = boot_cpu_data.x86_clflush_size;
+	const unsigned long clflush_size = x86_clflush_size();
 	void *p = (void *)((unsigned long)vaddr & ~(clflush_size - 1));
 	void *vend = vaddr + size;
 
diff -puN arch/x86/pci/common.c~x86_clflush_size-func arch/x86/pci/common.c
--- a/arch/x86/pci/common.c~x86_clflush_size-func	2024-02-22 10:08:52.120597034 -0800
+++ b/arch/x86/pci/common.c	2024-02-22 10:08:52.132597505 -0800
@@ -480,7 +480,7 @@  void pcibios_scan_root(int busnum)
 
 void __init pcibios_set_cache_line_size(void)
 {
-	pci_dfl_cache_line_size = boot_cpu_data.x86_clflush_size >> 2;
+	pci_dfl_cache_line_size = x86_clflush_size() >> 2;
 	printk(KERN_DEBUG "PCI: pci_cache_line_size set to %d bytes\n",
 		pci_dfl_cache_line_size << 2);
 }
diff -puN drivers/gpu/drm/drm_cache.c~x86_clflush_size-func drivers/gpu/drm/drm_cache.c
--- a/drivers/gpu/drm/drm_cache.c~x86_clflush_size-func	2024-02-22 10:08:52.124597191 -0800
+++ b/drivers/gpu/drm/drm_cache.c	2024-02-22 10:08:52.132597505 -0800
@@ -52,7 +52,7 @@  drm_clflush_page(struct page *page)
 {
 	uint8_t *page_virtual;
 	unsigned int i;
-	const int size = boot_cpu_data.x86_clflush_size;
+	const int size = x86_clflush_size();
 
 	if (unlikely(page == NULL))
 		return;
@@ -160,7 +160,7 @@  drm_clflush_virt_range(void *addr, unsig
 {
 #if defined(CONFIG_X86)
 	if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
-		const int size = boot_cpu_data.x86_clflush_size;
+		const int size = x86_clflush_size();
 		void *end = addr + length;
 
 		addr = (void *)(((unsigned long)addr) & -size);
diff -puN drivers/gpu/drm/i915/i915_cmd_parser.c~x86_clflush_size-func drivers/gpu/drm/i915/i915_cmd_parser.c
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c~x86_clflush_size-func	2024-02-22 10:08:52.124597191 -0800
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c	2024-02-22 10:08:52.132597505 -0800
@@ -1203,8 +1203,7 @@  static u32 *copy_batch(struct drm_i915_g
 		 */
 		remain = length;
 		if (dst_needs_clflush & CLFLUSH_BEFORE)
-			remain = round_up(remain,
-					  boot_cpu_data.x86_clflush_size);
+			remain = round_up(remain, x86_clflush_size());
 
 		ptr = dst;
 		x = offset_in_page(offset);
diff -puN drivers/gpu/drm/i915/i915_gem.c~x86_clflush_size-func drivers/gpu/drm/i915/i915_gem.c
--- a/drivers/gpu/drm/i915/i915_gem.c~x86_clflush_size-func	2024-02-22 10:08:52.124597191 -0800
+++ b/drivers/gpu/drm/i915/i915_gem.c	2024-02-22 10:08:52.132597505 -0800
@@ -696,7 +696,7 @@  i915_gem_shmem_pwrite(struct drm_i915_ge
 	 */
 	partial_cacheline_write = 0;
 	if (needs_clflush & CLFLUSH_BEFORE)
-		partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1;
+		partial_cacheline_write = x86_clflush_size() - 1;
 
 	user_data = u64_to_user_ptr(args->data_ptr);
 	remain = args->size;
diff -puN drivers/md/dm-writecache.c~x86_clflush_size-func drivers/md/dm-writecache.c
--- a/drivers/md/dm-writecache.c~x86_clflush_size-func	2024-02-22 10:08:52.128597348 -0800
+++ b/drivers/md/dm-writecache.c	2024-02-22 10:08:52.132597505 -0800
@@ -1229,7 +1229,7 @@  static void memcpy_flushcache_optimized(
 	 */
 #ifdef CONFIG_X86
 	if (static_cpu_has(X86_FEATURE_CLFLUSHOPT) &&
-	    likely(boot_cpu_data.x86_clflush_size == 64) &&
+	    likely(x86_clflush_size() == 64) &&
 	    likely(size >= 768)) {
 		do {
 			memcpy((void *)dest, (void *)source, 64);