Kernel TLB invalidation IPIs are a common source of interference on
NOHZ_FULL CPUs. Given NOHZ_FULL CPUs executing in userspace are not
accessing any kernel addresses, these invalidations do not need to happen
immediately, and can be deferred until the next user->kernel transition.
Rather than make __flush_tlb_all() noinstr, add a minimal noinstr
variant that doesn't try to leverage INVPCID.
FIXME: not fully noinstr compliant
XXX: same issue as with ins patching, when do we access data that should be
invalidated?
Signed-off-by: Valentin Schneider <vschneid@redhat.com>
---
arch/x86/include/asm/context_tracking_work.h | 4 ++++
arch/x86/include/asm/tlbflush.h | 1 +
arch/x86/mm/tlb.c | 17 +++++++++++++++++
include/linux/context_tracking_state.h | 4 ++++
include/linux/context_tracking_work.h | 2 ++
5 files changed, 28 insertions(+)
@@ -3,6 +3,7 @@
#define _ASM_X86_CONTEXT_TRACKING_WORK_H
#include <asm/sync_core.h>
+#include <asm/tlbflush.h>
static __always_inline void arch_context_tracking_work(int work)
{
@@ -10,6 +11,9 @@ static __always_inline void arch_context_tracking_work(int work)
case CONTEXT_WORK_SYNC:
sync_core();
break;
+ case CONTEXT_WORK_TLBI:
+ __flush_tlb_all_noinstr();
+ break;
}
}
@@ -17,6 +17,7 @@
DECLARE_PER_CPU(u64, tlbstate_untag_mask);
void __flush_tlb_all(void);
+void noinstr __flush_tlb_all_noinstr(void);
#define TLB_FLUSH_ALL -1UL
#define TLB_GENERATION_INVALID 0
@@ -1237,6 +1237,23 @@ void __flush_tlb_all(void)
}
EXPORT_SYMBOL_GPL(__flush_tlb_all);
+void noinstr __flush_tlb_all_noinstr(void)
+{
+ /*
+ * This is for invocation in early entry code that cannot be
+ * instrumented. A RMW to CR4 works for most cases, but relies on
+ * being able to flip either of the PGE or PCIDE bits. Flipping CR4.PCID
+ * would require also resetting CR3.PCID, so just try with CR4.PGE, else
+ * do the CR3 write.
+ *
+ * TODO: paravirt
+ */
+ if (cpu_feature_enabled(X86_FEATURE_PGE))
+ __native_tlb_flush_global(this_cpu_read(cpu_tlbstate.cr4));
+ else
+ flush_tlb_local();
+}
+
void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
{
struct flush_tlb_info *info;
@@ -62,6 +62,10 @@ enum ctx_state {
#define RCU_DYNTICKS_END (CT_STATE_SIZE - 1)
#define RCU_DYNTICKS_IDX BIT(RCU_DYNTICKS_START)
+/*
+ * When CONFIG_CONTEXT_TRACKING_WORK=n, _END is 1 behind _START, which makes
+ * the CONTEXT_WORK size computation below 0, which is what we want!
+ */
#define CONTEXT_WORK_START (CONTEXT_STATE_END + 1)
#define CONTEXT_WORK_END (RCU_DYNTICKS_START - 1)
@@ -6,11 +6,13 @@
enum {
CONTEXT_WORK_SYNC_OFFSET,
+ CONTEXT_WORK_TLBI_OFFSET,
CONTEXT_WORK_MAX_OFFSET
};
enum ct_work {
CONTEXT_WORK_SYNC = BIT(CONTEXT_WORK_SYNC_OFFSET),
+ CONTEXT_WORK_TLBI = BIT(CONTEXT_WORK_TLBI_OFFSET),
CONTEXT_WORK_MAX = BIT(CONTEXT_WORK_MAX_OFFSET)
};