[tip:,x86/urgent] x86/bugs: Add asm helpers for executing VERW

Message ID 170839092792.398.3678407222202963581.tip-bot2@tip-bot2
State New
Headers
Series [tip:,x86/urgent] x86/bugs: Add asm helpers for executing VERW |

Commit Message

tip-bot2 for Thomas Gleixner Feb. 20, 2024, 1:02 a.m. UTC
  The following commit has been merged into the x86/urgent branch of tip:

Commit-ID:     baf8361e54550a48a7087b603313ad013cc13386
Gitweb:        https://git.kernel.org/tip/baf8361e54550a48a7087b603313ad013cc13386
Author:        Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
AuthorDate:    Tue, 13 Feb 2024 18:21:35 -08:00
Committer:     Dave Hansen <dave.hansen@linux.intel.com>
CommitterDate: Mon, 19 Feb 2024 16:31:33 -08:00

x86/bugs: Add asm helpers for executing VERW

MDS mitigation requires clearing the CPU buffers before returning to
user. This needs to be done late in the exit-to-user path. Current
location of VERW leaves a possibility of kernel data ending up in CPU
buffers for memory accesses done after VERW such as:

  1. Kernel data accessed by an NMI between VERW and return-to-user can
     remain in CPU buffers since NMI returning to kernel does not
     execute VERW to clear CPU buffers.
  2. Alyssa reported that after VERW is executed,
     CONFIG_GCC_PLUGIN_STACKLEAK=y scrubs the stack used by a system
     call. Memory accesses during stack scrubbing can move kernel stack
     contents into CPU buffers.
  3. When caller saved registers are restored after a return from
     function executing VERW, the kernel stack accesses can remain in
     CPU buffers(since they occur after VERW).

To fix this VERW needs to be moved very late in exit-to-user path.

In preparation for moving VERW to entry/exit asm code, create macros
that can be used in asm. Also make VERW patching depend on a new feature
flag X86_FEATURE_CLEAR_CPU_BUF.

Reported-by: Alyssa Milburn <alyssa.milburn@intel.com>
Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Link: https://lore.kernel.org/all/20240213-delay-verw-v8-1-a6216d83edb7%40linux.intel.com
---
 arch/x86/entry/entry.S               | 23 +++++++++++++++++++++++
 arch/x86/include/asm/cpufeatures.h   |  2 +-
 arch/x86/include/asm/nospec-branch.h | 13 +++++++++++++
 3 files changed, 37 insertions(+), 1 deletion(-)
  

Comments

Nikolay Borisov Feb. 26, 2024, 7:17 a.m. UTC | #1
On 20.02.24 г. 3:02 ч., tip-bot2 for Pawan Gupta wrote:
> The following commit has been merged into the x86/urgent branch of tip:
> 
> Commit-ID:     baf8361e54550a48a7087b603313ad013cc13386
> Gitweb:        https://git.kernel.org/tip/baf8361e54550a48a7087b603313ad013cc13386
> Author:        Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
> AuthorDate:    Tue, 13 Feb 2024 18:21:35 -08:00
> Committer:     Dave Hansen <dave.hansen@linux.intel.com>
> CommitterDate: Mon, 19 Feb 2024 16:31:33 -08:00
> 
> x86/bugs: Add asm helpers for executing VERW
> 
> MDS mitigation requires clearing the CPU buffers before returning to
> user. This needs to be done late in the exit-to-user path. Current
> location of VERW leaves a possibility of kernel data ending up in CPU
> buffers for memory accesses done after VERW such as:
> 
>    1. Kernel data accessed by an NMI between VERW and return-to-user can
>       remain in CPU buffers since NMI returning to kernel does not
>       execute VERW to clear CPU buffers.
>    2. Alyssa reported that after VERW is executed,
>       CONFIG_GCC_PLUGIN_STACKLEAK=y scrubs the stack used by a system
>       call. Memory accesses during stack scrubbing can move kernel stack
>       contents into CPU buffers.
>    3. When caller saved registers are restored after a return from
>       function executing VERW, the kernel stack accesses can remain in
>       CPU buffers(since they occur after VERW).
> 
> To fix this VERW needs to be moved very late in exit-to-user path.
> 
> In preparation for moving VERW to entry/exit asm code, create macros
> that can be used in asm. Also make VERW patching depend on a new feature
> flag X86_FEATURE_CLEAR_CPU_BUF.
> 
> Reported-by: Alyssa Milburn <alyssa.milburn@intel.com>
> Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Suggested-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
> Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
> Link: https://lore.kernel.org/all/20240213-delay-verw-v8-1-a6216d83edb7%40linux.intel.com
> ---
>   arch/x86/entry/entry.S               | 23 +++++++++++++++++++++++
>   arch/x86/include/asm/cpufeatures.h   |  2 +-
>   arch/x86/include/asm/nospec-branch.h | 13 +++++++++++++
>   3 files changed, 37 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
> index 8c8d38f..0033790 100644
> --- a/arch/x86/entry/entry.S
> +++ b/arch/x86/entry/entry.S
> @@ -6,6 +6,9 @@
>   #include <linux/export.h>
>   #include <linux/linkage.h>
>   #include <asm/msr-index.h>
> +#include <asm/unwind_hints.h>
> +#include <asm/segment.h>
> +#include <asm/cache.h>
>   
>   .pushsection .noinstr.text, "ax"
>   
> @@ -20,3 +23,23 @@ SYM_FUNC_END(entry_ibpb)
>   EXPORT_SYMBOL_GPL(entry_ibpb);
>   
>   .popsection
> +
> +/*
> + * Define the VERW operand that is disguised as entry code so that
> + * it can be referenced with KPTI enabled. This ensure VERW can be
> + * used late in exit-to-user path after page tables are switched.
> + */
> +.pushsection .entry.text, "ax"
> +
> +.align L1_CACHE_BYTES, 0xcc
> +SYM_CODE_START_NOALIGN(mds_verw_sel)
> +	UNWIND_HINT_UNDEFINED
> +	ANNOTATE_NOENDBR
> +	.word __KERNEL_DS
> +.align L1_CACHE_BYTES, 0xcc
> +SYM_CODE_END(mds_verw_sel);
> +/* For KVM */
> +EXPORT_SYMBOL_GPL(mds_verw_sel);
> +
> +.popsection
> +
> diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
> index fdf723b..2b62cdd 100644
> --- a/arch/x86/include/asm/cpufeatures.h
> +++ b/arch/x86/include/asm/cpufeatures.h
> @@ -95,7 +95,7 @@
>   #define X86_FEATURE_SYSENTER32		( 3*32+15) /* "" sysenter in IA32 userspace */
>   #define X86_FEATURE_REP_GOOD		( 3*32+16) /* REP microcode works well */
>   #define X86_FEATURE_AMD_LBR_V2		( 3*32+17) /* AMD Last Branch Record Extension Version 2 */
> -/* FREE, was #define X86_FEATURE_LFENCE_RDTSC		( 3*32+18) "" LFENCE synchronizes RDTSC */
> +#define X86_FEATURE_CLEAR_CPU_BUF	( 3*32+18) /* "" Clear CPU buffers using VERW */
>   #define X86_FEATURE_ACC_POWER		( 3*32+19) /* AMD Accumulated Power Mechanism */
>   #define X86_FEATURE_NOPL		( 3*32+20) /* The NOPL (0F 1F) instructions */
>   #define X86_FEATURE_ALWAYS		( 3*32+21) /* "" Always-present feature */
> diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
> index 262e655..077083e 100644
> --- a/arch/x86/include/asm/nospec-branch.h
> +++ b/arch/x86/include/asm/nospec-branch.h
> @@ -315,6 +315,17 @@
>   #endif
>   .endm
>   
> +/*
> + * Macro to execute VERW instruction that mitigate transient data sampling
> + * attacks such as MDS. On affected systems a microcode update overloaded VERW
> + * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
> + *
> + * Note: Only the memory operand variant of VERW clears the CPU buffers.
> + */
> +.macro CLEAR_CPU_BUFFERS
> +	ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF

Any particular reason why this uses RIP-relative vs an absolute address 
mode? I know in our private exchange you said there is no significance 
but for example older kernels have a missing relocation support in 
alternatives. This of course can be worked around by slightly changing 
the logic of the macro which means different kernels will have slightly 
different macros. Relocation support landed in: 
270a69c4485d7d07516d058bcc0473c90ee22185 (6.5)

> +.endm
> +
>   #else /* __ASSEMBLY__ */
>   
>   #define ANNOTATE_RETPOLINE_SAFE					\
> @@ -536,6 +547,8 @@ DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
>   
>   DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
>   
> +extern u16 mds_verw_sel;
> +
>   #include <asm/segment.h>
>   
>   /**
>
  
Pawan Gupta Feb. 26, 2024, 10:10 p.m. UTC | #2
On Mon, Feb 26, 2024 at 09:17:30AM +0200, Nikolay Borisov wrote:
> > diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
> > index 262e655..077083e 100644
> > --- a/arch/x86/include/asm/nospec-branch.h
> > +++ b/arch/x86/include/asm/nospec-branch.h
> > @@ -315,6 +315,17 @@
> >   #endif
> >   .endm
> > +/*
> > + * Macro to execute VERW instruction that mitigate transient data sampling
> > + * attacks such as MDS. On affected systems a microcode update overloaded VERW
> > + * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
> > + *
> > + * Note: Only the memory operand variant of VERW clears the CPU buffers.
> > + */
> > +.macro CLEAR_CPU_BUFFERS
> > +	ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
> 
> Any particular reason why this uses RIP-relative vs an absolute address
> mode?

Early versions of the series had the VERW arg pointing to the macro
itself, that is why relative addressing was used. That got changed in a
later version with all VERW sites pointing to a single memory location.

> I know in our private exchange you said there is no significance but
> for example older kernels have a missing relocation support in alternatives.
> This of course can be worked around by slightly changing the logic of the
> macro which means different kernels will have slightly different macros.

Do you anticipate a problem with that? If yes, I can send a patch to use
fixed addressing in upstream as well.
  
Nikolay Borisov Feb. 26, 2024, 10:20 p.m. UTC | #3
On 27.02.24 г. 0:10 ч., Pawan Gupta wrote:
> On Mon, Feb 26, 2024 at 09:17:30AM +0200, Nikolay Borisov wrote:
>>> diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
>>> index 262e655..077083e 100644
>>> --- a/arch/x86/include/asm/nospec-branch.h
>>> +++ b/arch/x86/include/asm/nospec-branch.h
>>> @@ -315,6 +315,17 @@
>>>    #endif
>>>    .endm
>>> +/*
>>> + * Macro to execute VERW instruction that mitigate transient data sampling
>>> + * attacks such as MDS. On affected systems a microcode update overloaded VERW
>>> + * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
>>> + *
>>> + * Note: Only the memory operand variant of VERW clears the CPU buffers.
>>> + */
>>> +.macro CLEAR_CPU_BUFFERS
>>> +	ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
>>
>> Any particular reason why this uses RIP-relative vs an absolute address
>> mode?
> 
> Early versions of the series had the VERW arg pointing to the macro
> itself, that is why relative addressing was used. That got changed in a
> later version with all VERW sites pointing to a single memory location.
> 
>> I know in our private exchange you said there is no significance but
>> for example older kernels have a missing relocation support in alternatives.
>> This of course can be worked around by slightly changing the logic of the
>> macro which means different kernels will have slightly different macros.
> 
> Do you anticipate a problem with that? If yes, I can send a patch to use
> fixed addressing in upstream as well.

I experienced crashes on older kernels before realizing that the 
relocation wasn't resolved correctly by the alternative framework. 
Instead i simply changed the macro to jmp 1f, where the next instruction 
is the verw ( I did send a backport for 5.4) and it works. Recently 
there's been a push to make as much of the kernel assembly as possible 
PIC so having a rip-relative addressing helps. Whether that makes any 
material difference - I cannot say.

Here's my backport version for reference:

https://lore.kernel.org/stable/20240226122237.198921-3-nik.borisov@suse.com/
  
Pawan Gupta Feb. 26, 2024, 10:37 p.m. UTC | #4
On Tue, Feb 27, 2024 at 12:20:03AM +0200, Nikolay Borisov wrote:
> 
> 
> On 27.02.24 г. 0:10 ч., Pawan Gupta wrote:
> > On Mon, Feb 26, 2024 at 09:17:30AM +0200, Nikolay Borisov wrote:
> > > > diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
> > > > index 262e655..077083e 100644
> > > > --- a/arch/x86/include/asm/nospec-branch.h
> > > > +++ b/arch/x86/include/asm/nospec-branch.h
> > > > @@ -315,6 +315,17 @@
> > > >    #endif
> > > >    .endm
> > > > +/*
> > > > + * Macro to execute VERW instruction that mitigate transient data sampling
> > > > + * attacks such as MDS. On affected systems a microcode update overloaded VERW
> > > > + * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
> > > > + *
> > > > + * Note: Only the memory operand variant of VERW clears the CPU buffers.
> > > > + */
> > > > +.macro CLEAR_CPU_BUFFERS
> > > > +	ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
> > > 
> > > Any particular reason why this uses RIP-relative vs an absolute address
> > > mode?
> > 
> > Early versions of the series had the VERW arg pointing to the macro
> > itself, that is why relative addressing was used. That got changed in a
> > later version with all VERW sites pointing to a single memory location.
> > 
> > > I know in our private exchange you said there is no significance but
> > > for example older kernels have a missing relocation support in alternatives.
> > > This of course can be worked around by slightly changing the logic of the
> > > macro which means different kernels will have slightly different macros.
> > 
> > Do you anticipate a problem with that? If yes, I can send a patch to use
> > fixed addressing in upstream as well.
> 
> I experienced crashes on older kernels before realizing that the relocation
> wasn't resolved correctly by the alternative framework. Instead i simply
> changed the macro to jmp 1f, where the next instruction is the verw ( I did
> send a backport for 5.4) and it works. Recently there's been a push to make
> as much of the kernel assembly as possible PIC so having a rip-relative
> addressing helps. Whether that makes any material difference - I cannot say.

Ok, sending the patch.

> Here's my backport version for reference:
> 
> https://lore.kernel.org/stable/20240226122237.198921-3-nik.borisov@suse.com/

Below should also solve the problem with less churn:

---
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 2aa52cab1e46..ab19c7f1167b 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -323,7 +323,7 @@
  * Note: Only the memory operand variant of VERW clears the CPU buffers.
  */
 .macro CLEAR_CPU_BUFFERS
-	ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
+	ALTERNATIVE "", __stringify(verw mds_verw_sel), X86_FEATURE_CLEAR_CPU_BUF
 .endm
 
 #else /* __ASSEMBLY__ */
  

Patch

diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
index 8c8d38f..0033790 100644
--- a/arch/x86/entry/entry.S
+++ b/arch/x86/entry/entry.S
@@ -6,6 +6,9 @@ 
 #include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/msr-index.h>
+#include <asm/unwind_hints.h>
+#include <asm/segment.h>
+#include <asm/cache.h>
 
 .pushsection .noinstr.text, "ax"
 
@@ -20,3 +23,23 @@  SYM_FUNC_END(entry_ibpb)
 EXPORT_SYMBOL_GPL(entry_ibpb);
 
 .popsection
+
+/*
+ * Define the VERW operand that is disguised as entry code so that
+ * it can be referenced with KPTI enabled. This ensure VERW can be
+ * used late in exit-to-user path after page tables are switched.
+ */
+.pushsection .entry.text, "ax"
+
+.align L1_CACHE_BYTES, 0xcc
+SYM_CODE_START_NOALIGN(mds_verw_sel)
+	UNWIND_HINT_UNDEFINED
+	ANNOTATE_NOENDBR
+	.word __KERNEL_DS
+.align L1_CACHE_BYTES, 0xcc
+SYM_CODE_END(mds_verw_sel);
+/* For KVM */
+EXPORT_SYMBOL_GPL(mds_verw_sel);
+
+.popsection
+
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index fdf723b..2b62cdd 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -95,7 +95,7 @@ 
 #define X86_FEATURE_SYSENTER32		( 3*32+15) /* "" sysenter in IA32 userspace */
 #define X86_FEATURE_REP_GOOD		( 3*32+16) /* REP microcode works well */
 #define X86_FEATURE_AMD_LBR_V2		( 3*32+17) /* AMD Last Branch Record Extension Version 2 */
-/* FREE, was #define X86_FEATURE_LFENCE_RDTSC		( 3*32+18) "" LFENCE synchronizes RDTSC */
+#define X86_FEATURE_CLEAR_CPU_BUF	( 3*32+18) /* "" Clear CPU buffers using VERW */
 #define X86_FEATURE_ACC_POWER		( 3*32+19) /* AMD Accumulated Power Mechanism */
 #define X86_FEATURE_NOPL		( 3*32+20) /* The NOPL (0F 1F) instructions */
 #define X86_FEATURE_ALWAYS		( 3*32+21) /* "" Always-present feature */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 262e655..077083e 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -315,6 +315,17 @@ 
 #endif
 .endm
 
+/*
+ * Macro to execute VERW instruction that mitigate transient data sampling
+ * attacks such as MDS. On affected systems a microcode update overloaded VERW
+ * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
+ *
+ * Note: Only the memory operand variant of VERW clears the CPU buffers.
+ */
+.macro CLEAR_CPU_BUFFERS
+	ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
+.endm
+
 #else /* __ASSEMBLY__ */
 
 #define ANNOTATE_RETPOLINE_SAFE					\
@@ -536,6 +547,8 @@  DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
 
 DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
 
+extern u16 mds_verw_sel;
+
 #include <asm/segment.h>
 
 /**