[v4,1/6] x86/bugs: Add asm helpers for executing VERW

Message ID 20231027-delay-verw-v4-1-9a3622d4bcf7@linux.intel.com
State New
Headers
Series Delay VERW |

Commit Message

Pawan Gupta Oct. 27, 2023, 2:38 p.m. UTC
  MDS mitigation requires clearing the CPU buffers before returning to
user. This needs to be done late in the exit-to-user path. Current
location of VERW leaves a possibility of kernel data ending up in CPU
buffers for memory accesses done after VERW such as:

  1. Kernel data accessed by an NMI between VERW and return-to-user can
     remain in CPU buffers ( since NMI returning to kernel does not
     execute VERW to clear CPU buffers.
  2. Alyssa reported that after VERW is executed,
     CONFIG_GCC_PLUGIN_STACKLEAK=y scrubs the stack used by a system
     call. Memory accesses during stack scrubbing can move kernel stack
     contents into CPU buffers.
  3. When caller saved registers are restored after a return from
     function executing VERW, the kernel stack accesses can remain in
     CPU buffers(since they occur after VERW).

To fix this VERW needs to be moved very late in exit-to-user path.

In preparation for moving VERW to entry/exit asm code, create macros
that can be used in asm. Also make them depend on a new feature flag
X86_FEATURE_CLEAR_CPU_BUF.

Reported-by: Alyssa Milburn <alyssa.milburn@intel.com>
Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
---
 arch/x86/entry/entry.S               | 17 +++++++++++++++++
 arch/x86/include/asm/cpufeatures.h   |  2 +-
 arch/x86/include/asm/nospec-branch.h | 15 +++++++++++++++
 3 files changed, 33 insertions(+), 1 deletion(-)
  

Comments

Borislav Petkov Oct. 27, 2023, 3:32 p.m. UTC | #1
On Fri, Oct 27, 2023 at 07:38:40AM -0700, Pawan Gupta wrote:
> MDS mitigation requires clearing the CPU buffers before returning to
> user. This needs to be done late in the exit-to-user path. Current
> location of VERW leaves a possibility of kernel data ending up in CPU
> buffers for memory accesses done after VERW such as:
> 
>   1. Kernel data accessed by an NMI between VERW and return-to-user can
>      remain in CPU buffers ( since NMI returning to kernel does not

Some leftover '('

>      execute VERW to clear CPU buffers.
>   2. Alyssa reported that after VERW is executed,
>      CONFIG_GCC_PLUGIN_STACKLEAK=y scrubs the stack used by a system
>      call. Memory accesses during stack scrubbing can move kernel stack
>      contents into CPU buffers.
>   3. When caller saved registers are restored after a return from
>      function executing VERW, the kernel stack accesses can remain in
>      CPU buffers(since they occur after VERW).
> 
> To fix this VERW needs to be moved very late in exit-to-user path.
> 
> In preparation for moving VERW to entry/exit asm code, create macros
> that can be used in asm. Also make them depend on a new feature flag
> X86_FEATURE_CLEAR_CPU_BUF.

The macros don't depend on the feature flag - VERW patching is done
based on it.

> @@ -20,3 +23,17 @@ SYM_FUNC_END(entry_ibpb)
>  EXPORT_SYMBOL_GPL(entry_ibpb);
>  
>  .popsection
> +
> +.pushsection .entry.text, "ax"
> +
> +.align L1_CACHE_BYTES, 0xcc
> +SYM_CODE_START_NOALIGN(mds_verw_sel)

That weird thing needs a comment explaining what it is for.

> +	UNWIND_HINT_UNDEFINED
> +	ANNOTATE_NOENDBR
> +	.word __KERNEL_DS
> +.align L1_CACHE_BYTES, 0xcc
> +SYM_CODE_END(mds_verw_sel);
> +/* For KVM */
> +EXPORT_SYMBOL_GPL(mds_verw_sel);
> +
> +.popsection
> diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
> index 58cb9495e40f..f21fc0f12737 100644
> --- a/arch/x86/include/asm/cpufeatures.h
> +++ b/arch/x86/include/asm/cpufeatures.h
> @@ -308,10 +308,10 @@
>  #define X86_FEATURE_SMBA		(11*32+21) /* "" Slow Memory Bandwidth Allocation */
>  #define X86_FEATURE_BMEC		(11*32+22) /* "" Bandwidth Monitoring Event Configuration */
>  #define X86_FEATURE_USER_SHSTK		(11*32+23) /* Shadow stack support for user mode applications */
> -
>  #define X86_FEATURE_SRSO		(11*32+24) /* "" AMD BTB untrain RETs */
>  #define X86_FEATURE_SRSO_ALIAS		(11*32+25) /* "" AMD BTB untrain RETs through aliasing */
>  #define X86_FEATURE_IBPB_ON_VMEXIT	(11*32+26) /* "" Issue an IBPB only on VMEXIT */
> +#define X86_FEATURE_CLEAR_CPU_BUF	(11*32+27) /* "" Clear CPU buffers */

									   ... using VERW

>  
>  /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
>  #define X86_FEATURE_AVX_VNNI		(12*32+ 4) /* AVX VNNI instructions */
> diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
> index c55cc243592e..005e69f93115 100644
> --- a/arch/x86/include/asm/nospec-branch.h
> +++ b/arch/x86/include/asm/nospec-branch.h
> @@ -329,6 +329,21 @@
>  #endif
>  .endm
>  
> +/*
> + * Macros to execute VERW instruction that mitigate transient data sampling
> + * attacks such as MDS. On affected systems a microcode update overloaded VERW
> + * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
> + *
> + * Note: Only the memory operand variant of VERW clears the CPU buffers.
> + */
> +.macro EXEC_VERW
> +	verw _ASM_RIP(mds_verw_sel)
> +.endm
> +
> +.macro CLEAR_CPU_BUFFERS
> +	ALTERNATIVE "", __stringify(EXEC_VERW), X86_FEATURE_CLEAR_CPU_BUF
> +.endm

Why can't this simply be:

.macro CLEAR_CPU_BUFFERS
        ALTERNATIVE "", "verw mds_verw_sel(%rip)", X86_FEATURE_CLEAR_CPU_BUF
.endm

without that silly EXEC_VERW macro?
  
Pawan Gupta Nov. 2, 2023, 12:01 a.m. UTC | #2
On Fri, Oct 27, 2023 at 05:32:03PM +0200, Borislav Petkov wrote:
> On Fri, Oct 27, 2023 at 07:38:40AM -0700, Pawan Gupta wrote:
> >   1. Kernel data accessed by an NMI between VERW and return-to-user can
> >      remain in CPU buffers ( since NMI returning to kernel does not
> 
> Some leftover '('

Ok.

> > In preparation for moving VERW to entry/exit asm code, create macros
> > that can be used in asm. Also make them depend on a new feature flag
> > X86_FEATURE_CLEAR_CPU_BUF.
> 
> The macros don't depend on the feature flag - VERW patching is done
> based on it.

Will fix.

> > @@ -20,3 +23,17 @@ SYM_FUNC_END(entry_ibpb)
> >  EXPORT_SYMBOL_GPL(entry_ibpb);
> >  
> >  .popsection
> > +
> > +.pushsection .entry.text, "ax"
> > +
> > +.align L1_CACHE_BYTES, 0xcc
> > +SYM_CODE_START_NOALIGN(mds_verw_sel)
> 
> That weird thing needs a comment explaining what it is for.

Right.

> > +#define X86_FEATURE_CLEAR_CPU_BUF	(11*32+27) /* "" Clear CPU buffers */
> 
> 									   ... using VERW

Ok.

> > +/*
> > + * Macros to execute VERW instruction that mitigate transient data sampling
> > + * attacks such as MDS. On affected systems a microcode update overloaded VERW
> > + * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
> > + *
> > + * Note: Only the memory operand variant of VERW clears the CPU buffers.
> > + */
> > +.macro EXEC_VERW
> > +	verw _ASM_RIP(mds_verw_sel)
> > +.endm
> > +
> > +.macro CLEAR_CPU_BUFFERS
> > +	ALTERNATIVE "", __stringify(EXEC_VERW), X86_FEATURE_CLEAR_CPU_BUF
> > +.endm
> 
> Why can't this simply be:
> 
> .macro CLEAR_CPU_BUFFERS
>         ALTERNATIVE "", "verw mds_verw_sel(%rip)", X86_FEATURE_CLEAR_CPU_BUF

This will not work in 32-bit mode that uses the same macro.

Thanks for the review.
  
Josh Poimboeuf Dec. 1, 2023, 7:36 p.m. UTC | #3
On Fri, Oct 27, 2023 at 07:38:40AM -0700, Pawan Gupta wrote:
> +.pushsection .entry.text, "ax"
> +
> +.align L1_CACHE_BYTES, 0xcc
> +SYM_CODE_START_NOALIGN(mds_verw_sel)
> +	UNWIND_HINT_UNDEFINED
> +	ANNOTATE_NOENDBR
> +	.word __KERNEL_DS
> +.align L1_CACHE_BYTES, 0xcc
> +SYM_CODE_END(mds_verw_sel);
> +/* For KVM */
> +EXPORT_SYMBOL_GPL(mds_verw_sel);
> +
> +.popsection

This is data, so why is it "CODE" in .entry.text?
  
Andrew Cooper Dec. 1, 2023, 7:39 p.m. UTC | #4
On 01/12/2023 7:36 pm, Josh Poimboeuf wrote:
> On Fri, Oct 27, 2023 at 07:38:40AM -0700, Pawan Gupta wrote:
>> +.pushsection .entry.text, "ax"
>> +
>> +.align L1_CACHE_BYTES, 0xcc
>> +SYM_CODE_START_NOALIGN(mds_verw_sel)
>> +	UNWIND_HINT_UNDEFINED
>> +	ANNOTATE_NOENDBR
>> +	.word __KERNEL_DS
>> +.align L1_CACHE_BYTES, 0xcc
>> +SYM_CODE_END(mds_verw_sel);
>> +/* For KVM */
>> +EXPORT_SYMBOL_GPL(mds_verw_sel);
>> +
>> +.popsection
> This is data, so why is it "CODE" in .entry.text?

Because KPTI.

~Andrew
  
Josh Poimboeuf Dec. 1, 2023, 8:04 p.m. UTC | #5
On Fri, Dec 01, 2023 at 07:39:05PM +0000, Andrew Cooper wrote:
> On 01/12/2023 7:36 pm, Josh Poimboeuf wrote:
> > On Fri, Oct 27, 2023 at 07:38:40AM -0700, Pawan Gupta wrote:
> >> +.pushsection .entry.text, "ax"
> >> +
> >> +.align L1_CACHE_BYTES, 0xcc
> >> +SYM_CODE_START_NOALIGN(mds_verw_sel)
> >> +	UNWIND_HINT_UNDEFINED
> >> +	ANNOTATE_NOENDBR
> >> +	.word __KERNEL_DS
> >> +.align L1_CACHE_BYTES, 0xcc
> >> +SYM_CODE_END(mds_verw_sel);
> >> +/* For KVM */
> >> +EXPORT_SYMBOL_GPL(mds_verw_sel);
> >> +
> >> +.popsection
> > This is data, so why is it "CODE" in .entry.text?
> 
> Because KPTI.

Urgh... Pawan please add a comment.
  
Pawan Gupta Dec. 20, 2023, 1:15 a.m. UTC | #6
On Fri, Dec 01, 2023 at 12:04:42PM -0800, Josh Poimboeuf wrote:
> On Fri, Dec 01, 2023 at 07:39:05PM +0000, Andrew Cooper wrote:
> > On 01/12/2023 7:36 pm, Josh Poimboeuf wrote:
> > > On Fri, Oct 27, 2023 at 07:38:40AM -0700, Pawan Gupta wrote:
> > >> +.pushsection .entry.text, "ax"
> > >> +
> > >> +.align L1_CACHE_BYTES, 0xcc
> > >> +SYM_CODE_START_NOALIGN(mds_verw_sel)
> > >> +	UNWIND_HINT_UNDEFINED
> > >> +	ANNOTATE_NOENDBR
> > >> +	.word __KERNEL_DS
> > >> +.align L1_CACHE_BYTES, 0xcc
> > >> +SYM_CODE_END(mds_verw_sel);
> > >> +/* For KVM */
> > >> +EXPORT_SYMBOL_GPL(mds_verw_sel);
> > >> +
> > >> +.popsection
> > > This is data, so why is it "CODE" in .entry.text?
> > 
> > Because KPTI.
> 
> Urgh... Pawan please add a comment.

Yes, this place needs a comment, will add.
  

Patch

diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
index bfb7bcb362bc..8dc84bb9dc0b 100644
--- a/arch/x86/entry/entry.S
+++ b/arch/x86/entry/entry.S
@@ -6,6 +6,9 @@ 
 #include <linux/linkage.h>
 #include <asm/export.h>
 #include <asm/msr-index.h>
+#include <asm/unwind_hints.h>
+#include <asm/segment.h>
+#include <asm/cache.h>
 
 .pushsection .noinstr.text, "ax"
 
@@ -20,3 +23,17 @@  SYM_FUNC_END(entry_ibpb)
 EXPORT_SYMBOL_GPL(entry_ibpb);
 
 .popsection
+
+.pushsection .entry.text, "ax"
+
+.align L1_CACHE_BYTES, 0xcc
+SYM_CODE_START_NOALIGN(mds_verw_sel)
+	UNWIND_HINT_UNDEFINED
+	ANNOTATE_NOENDBR
+	.word __KERNEL_DS
+.align L1_CACHE_BYTES, 0xcc
+SYM_CODE_END(mds_verw_sel);
+/* For KVM */
+EXPORT_SYMBOL_GPL(mds_verw_sel);
+
+.popsection
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 58cb9495e40f..f21fc0f12737 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -308,10 +308,10 @@ 
 #define X86_FEATURE_SMBA		(11*32+21) /* "" Slow Memory Bandwidth Allocation */
 #define X86_FEATURE_BMEC		(11*32+22) /* "" Bandwidth Monitoring Event Configuration */
 #define X86_FEATURE_USER_SHSTK		(11*32+23) /* Shadow stack support for user mode applications */
-
 #define X86_FEATURE_SRSO		(11*32+24) /* "" AMD BTB untrain RETs */
 #define X86_FEATURE_SRSO_ALIAS		(11*32+25) /* "" AMD BTB untrain RETs through aliasing */
 #define X86_FEATURE_IBPB_ON_VMEXIT	(11*32+26) /* "" Issue an IBPB only on VMEXIT */
+#define X86_FEATURE_CLEAR_CPU_BUF	(11*32+27) /* "" Clear CPU buffers */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
 #define X86_FEATURE_AVX_VNNI		(12*32+ 4) /* AVX VNNI instructions */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index c55cc243592e..005e69f93115 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -329,6 +329,21 @@ 
 #endif
 .endm
 
+/*
+ * Macros to execute VERW instruction that mitigate transient data sampling
+ * attacks such as MDS. On affected systems a microcode update overloaded VERW
+ * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
+ *
+ * Note: Only the memory operand variant of VERW clears the CPU buffers.
+ */
+.macro EXEC_VERW
+	verw _ASM_RIP(mds_verw_sel)
+.endm
+
+.macro CLEAR_CPU_BUFFERS
+	ALTERNATIVE "", __stringify(EXEC_VERW), X86_FEATURE_CLEAR_CPU_BUF
+.endm
+
 #else /* __ASSEMBLY__ */
 
 #define ANNOTATE_RETPOLINE_SAFE					\