[2/3] x86/entry: Disable IA32 syscalls in the presence of ia32_disabled

Message ID 20230607072936.3766231-3-nik.borisov@suse.com
State New
Headers
Series Add ability to disable ia32 at boot time |

Commit Message

Nikolay Borisov June 7, 2023, 7:29 a.m. UTC
  First stage of disabling ia32 compat layer is to disable 32bit syscall
entry points. Legacy int 0x80 vector is disabled by setting its gate
descriptor to "not present" and the sysenter vector is disabled by
re-using the existing code in case IA32_EMULATION is disabled.

Signed-off-by: Nikolay Borisov <nik.borisov@suse.com>
---
 arch/x86/entry/entry_64.S    |  2 --
 arch/x86/include/asm/desc.h  |  5 +++++
 arch/x86/kernel/cpu/common.c | 29 ++++++++++++++++++-----------
 3 files changed, 23 insertions(+), 13 deletions(-)
  

Comments

Thomas Gleixner June 7, 2023, 9:11 a.m. UTC | #1
On Wed, Jun 07 2023 at 10:29, Nikolay Borisov wrote:
> diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
> index ab97b22ac04a..618b428586d1 100644
> --- a/arch/x86/include/asm/desc.h
> +++ b/arch/x86/include/asm/desc.h
> @@ -8,6 +8,7 @@
>  #include <asm/fixmap.h>
>  #include <asm/irq_vectors.h>
>  #include <asm/cpu_entry_area.h>
> +#include <asm/traps.h>
>  
>  #include <linux/debug_locks.h>
>  #include <linux/smp.h>
> @@ -429,6 +430,10 @@ static inline void idt_init_desc(gate_desc *gate, const struct idt_data *d)
>  	gate->offset_high	= (u32) (addr >> 32);
>  	gate->reserved		= 0;
>  #endif
> +#ifdef CONFIG_IA32_EMULATION
> +	if (ia32_disabled && d->vector == IA32_SYSCALL_VECTOR)
> +		gate->bits.p = 0;
> +#endif

Why installing the IDT vector in the first place? This is completely
inconsistent with the CONFIG_IA32_EMULATION=n behaviour.

Just slapping this conditional into some random place is really not
cutting it.

The obvious solution is to remove IA32_SYSCALL_VECTOR from def_idts[]
and handle it separately.

>  extern unsigned long system_vectors[];
> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> index 80710a68ef7d..71f8b55f70c9 100644
> --- a/arch/x86/kernel/cpu/common.c
> +++ b/arch/x86/kernel/cpu/common.c
> @@ -2054,17 +2054,24 @@ void syscall_init(void)
>  	wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
>  
>  #ifdef CONFIG_IA32_EMULATION
> -	wrmsrl_cstar((unsigned long)entry_SYSCALL_compat);
> -	/*
> -	 * This only works on Intel CPUs.
> -	 * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
> -	 * This does not cause SYSENTER to jump to the wrong location, because
> -	 * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
> -	 */
> -	wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
> -	wrmsrl_safe(MSR_IA32_SYSENTER_ESP,
> -		    (unsigned long)(cpu_entry_stack(smp_processor_id()) + 1));
> -	wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
> +	if (ia32_disabled) {
> +		wrmsrl_cstar((unsigned long)ignore_sysret);
> +		wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
> +		wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
> +		wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
> +	} else {
> +		wrmsrl_cstar((unsigned long)entry_SYSCALL_compat);
> +		/*
> +		 * This only works on Intel CPUs.
> +		 * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
> +		 * This does not cause SYSENTER to jump to the wrong location, because
> +		 * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
> +		 */
> +		wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
> +		wrmsrl_safe(MSR_IA32_SYSENTER_ESP,
> +			    (unsigned long)(cpu_entry_stack(smp_processor_id()) + 1));
> +		wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
> +	}
>  #else
>  	wrmsrl_cstar((unsigned long)ignore_sysret);
>  	wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);

So this ends up with two copies of the same code for invalidating
compat. Why?

   if (IS_ENABLED(CONFIG_IA32_EMULATION) && !ia32_disabled)) {
	wrmsrl_cstar((unsigned long)entry_SYSCALL_compat);
        ...
   } else {
	wrmsrl_cstar((unsigned long)ignore_sysret);
        ...
   }

All it requires is

#ifdef CONFIG_IA32_EMULATION
void entry_SYSCALL_compat(void);
#else
#define entry_SYSCALL_compat NULL
#endif

in the header which declares entry_SYSCALL_compat.

No?

Thanks,

        tglx
  
Brian Gerst June 8, 2023, 3:18 a.m. UTC | #2
On Wed, Jun 7, 2023 at 5:23 AM Thomas Gleixner <tglx@linutronix.de> wrote:
>
> On Wed, Jun 07 2023 at 10:29, Nikolay Borisov wrote:
> > diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
> > index ab97b22ac04a..618b428586d1 100644
> > --- a/arch/x86/include/asm/desc.h
> > +++ b/arch/x86/include/asm/desc.h
> > @@ -8,6 +8,7 @@
> >  #include <asm/fixmap.h>
> >  #include <asm/irq_vectors.h>
> >  #include <asm/cpu_entry_area.h>
> > +#include <asm/traps.h>
> >
> >  #include <linux/debug_locks.h>
> >  #include <linux/smp.h>
> > @@ -429,6 +430,10 @@ static inline void idt_init_desc(gate_desc *gate, const struct idt_data *d)
> >       gate->offset_high       = (u32) (addr >> 32);
> >       gate->reserved          = 0;
> >  #endif
> > +#ifdef CONFIG_IA32_EMULATION
> > +     if (ia32_disabled && d->vector == IA32_SYSCALL_VECTOR)
> > +             gate->bits.p = 0;
> > +#endif
>
> Why installing the IDT vector in the first place? This is completely
> inconsistent with the CONFIG_IA32_EMULATION=n behaviour.
>
> Just slapping this conditional into some random place is really not
> cutting it.
>
> The obvious solution is to remove IA32_SYSCALL_VECTOR from def_idts[]
> and handle it separately.
>
> >  extern unsigned long system_vectors[];
> > diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> > index 80710a68ef7d..71f8b55f70c9 100644
> > --- a/arch/x86/kernel/cpu/common.c
> > +++ b/arch/x86/kernel/cpu/common.c
> > @@ -2054,17 +2054,24 @@ void syscall_init(void)
> >       wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
> >
> >  #ifdef CONFIG_IA32_EMULATION
> > -     wrmsrl_cstar((unsigned long)entry_SYSCALL_compat);
> > -     /*
> > -      * This only works on Intel CPUs.
> > -      * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
> > -      * This does not cause SYSENTER to jump to the wrong location, because
> > -      * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
> > -      */
> > -     wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
> > -     wrmsrl_safe(MSR_IA32_SYSENTER_ESP,
> > -                 (unsigned long)(cpu_entry_stack(smp_processor_id()) + 1));
> > -     wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
> > +     if (ia32_disabled) {
> > +             wrmsrl_cstar((unsigned long)ignore_sysret);
> > +             wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
> > +             wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
> > +             wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
> > +     } else {
> > +             wrmsrl_cstar((unsigned long)entry_SYSCALL_compat);
> > +             /*
> > +              * This only works on Intel CPUs.
> > +              * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
> > +              * This does not cause SYSENTER to jump to the wrong location, because
> > +              * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
> > +              */
> > +             wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
> > +             wrmsrl_safe(MSR_IA32_SYSENTER_ESP,
> > +                         (unsigned long)(cpu_entry_stack(smp_processor_id()) + 1));
> > +             wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
> > +     }
> >  #else
> >       wrmsrl_cstar((unsigned long)ignore_sysret);
> >       wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
>
> So this ends up with two copies of the same code for invalidating
> compat. Why?
>
>    if (IS_ENABLED(CONFIG_IA32_EMULATION) && !ia32_disabled)) {
>         wrmsrl_cstar((unsigned long)entry_SYSCALL_compat);
>         ...
>    } else {
>         wrmsrl_cstar((unsigned long)ignore_sysret);
>         ...
>    }
>
> All it requires is
>
> #ifdef CONFIG_IA32_EMULATION
> void entry_SYSCALL_compat(void);
> #else
> #define entry_SYSCALL_compat NULL
> #endif
>
> in the header which declares entry_SYSCALL_compat.
>
> No?

SYSCALL from 32-bit mode can't be disabled like that.  That's why
ignore_sysret() exists for the !CONFIG_IA32_EMULATION case.

Brian Gerst
  

Patch

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index f31e286c2977..5e0e8a5e05ca 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1514,7 +1514,6 @@  SYM_CODE_START(asm_exc_nmi)
 	iretq
 SYM_CODE_END(asm_exc_nmi)
 
-#ifndef CONFIG_IA32_EMULATION
 /*
  * This handles SYSCALL from 32-bit code.  There is no way to program
  * MSRs to fully disable 32-bit SYSCALL.
@@ -1525,7 +1524,6 @@  SYM_CODE_START(ignore_sysret)
 	mov	$-ENOSYS, %eax
 	sysretl
 SYM_CODE_END(ignore_sysret)
-#endif
 
 .pushsection .text, "ax"
 	__FUNC_ALIGN
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index ab97b22ac04a..618b428586d1 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -8,6 +8,7 @@ 
 #include <asm/fixmap.h>
 #include <asm/irq_vectors.h>
 #include <asm/cpu_entry_area.h>
+#include <asm/traps.h>
 
 #include <linux/debug_locks.h>
 #include <linux/smp.h>
@@ -429,6 +430,10 @@  static inline void idt_init_desc(gate_desc *gate, const struct idt_data *d)
 	gate->offset_high	= (u32) (addr >> 32);
 	gate->reserved		= 0;
 #endif
+#ifdef CONFIG_IA32_EMULATION
+	if (ia32_disabled && d->vector == IA32_SYSCALL_VECTOR)
+		gate->bits.p = 0;
+#endif
 }
 
 extern unsigned long system_vectors[];
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 80710a68ef7d..71f8b55f70c9 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2054,17 +2054,24 @@  void syscall_init(void)
 	wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
 
 #ifdef CONFIG_IA32_EMULATION
-	wrmsrl_cstar((unsigned long)entry_SYSCALL_compat);
-	/*
-	 * This only works on Intel CPUs.
-	 * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
-	 * This does not cause SYSENTER to jump to the wrong location, because
-	 * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
-	 */
-	wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
-	wrmsrl_safe(MSR_IA32_SYSENTER_ESP,
-		    (unsigned long)(cpu_entry_stack(smp_processor_id()) + 1));
-	wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
+	if (ia32_disabled) {
+		wrmsrl_cstar((unsigned long)ignore_sysret);
+		wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
+		wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+		wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
+	} else {
+		wrmsrl_cstar((unsigned long)entry_SYSCALL_compat);
+		/*
+		 * This only works on Intel CPUs.
+		 * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
+		 * This does not cause SYSENTER to jump to the wrong location, because
+		 * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
+		 */
+		wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
+		wrmsrl_safe(MSR_IA32_SYSENTER_ESP,
+			    (unsigned long)(cpu_entry_stack(smp_processor_id()) + 1));
+		wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
+	}
 #else
 	wrmsrl_cstar((unsigned long)ignore_sysret);
 	wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);