[RFC,2/2] x86/kprobes: Prohibit probing on compiler generated CFI checking code

Message ID 168899127520.80889.15418363018799407058.stgit@devnote2
State New
Headers
Series x86: kprobes: Fix CFI_CLANG related issues |

Commit Message

Masami Hiramatsu (Google) July 10, 2023, 12:14 p.m. UTC
  From: Masami Hiramatsu (Google) <mhiramat@kernel.org>

Prohibit probing on the compiler generated CFI typeid checking code
because it is used for decoding typeid when CFI error happens.

The compiler generates the following instruction sequence for indirect
call checks on x86;

   movl    -<id>, %r10d       ; 6 bytes
   addl    -4(%reg), %r10d    ; 4 bytes
   je      .Ltmp1             ; 2 bytes
   ud2                        ; <- regs->ip

And handle_cfi_failure() decodes these instructions (movl and addl)
for the typeid and the target address. Thus if we put a kprobe on
those instructions, the decode will fail and report a wrong typeid
and target address.


Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 arch/x86/kernel/kprobes/core.c |   34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
  

Comments

Peter Zijlstra July 10, 2023, 4:16 p.m. UTC | #1
On Mon, Jul 10, 2023 at 09:14:35PM +0900, Masami Hiramatsu (Google) wrote:
> +	if (IS_ENABLED(CONFIG_CFI_CLANG)) {
> +		/*
> +		 * The compiler generates the following instruction sequence
> +		 * for indirect call checks and cfi.c decodes this;
> +		 *
> +		 *   movl    -<id>, %r10d       ; 6 bytes
> +		 *   addl    -4(%reg), %r10d    ; 4 bytes
> +		 *   je      .Ltmp1             ; 2 bytes
> +		 *   ud2                        ; <- regs->ip
> +		 *   .Ltmp1:
> +		 *
> +		 * Also, these movl and addl are used for showing expected
> +		 * type. So those must not be touched.
> +		 */
> +		__addr = recover_probed_instruction(buf, addr);
> +		if (!__addr)
> +			return 0;
> +
> +		if (insn_decode_kernel(&insn, (void *)__addr) < 0)
> +			return 0;
> +
> +		if (insn.opcode.value == 0xBA)
> +			offset = 12;
> +		else if (insn.opcode.value == 0x3)
> +			offset = 6;
> +		else
> +			goto out;

Notably, the JE will already be avoided?

> +
> +		/* This movl/addl is used for decoding CFI. */
> +		if (is_cfi_trap(addr + offset))
> +			return 0;
> +	}
>  
> +out:
>  	return (addr == paddr);
>  }

Hmm, so I was thinking something like the below, which also catches
things when we rewrite kCFI to FineIBT, except I don't think we care if
the FineIBT callsite gets re-written. FineIBT only relies on the __cfi_
symbol not getting poked at (which the previous patches should ensure).

Additionally is_cfi_trap() is horrifically slow -- it's a full linear
search of the entire kcfi_traps array, it doesn't have any smarts on
(#UD can hardly be considered a fast path).

So I tihnk I'm ok with the above, just adding the below for reference
(completely untested and everything).


Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>


---
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index f7f6042eb7e6..b812dee76909 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -293,6 +293,11 @@ static int can_probe(unsigned long paddr)
 #endif
 		addr += insn.length;
 	}
+	/*
+	 * Don't allow poking the kCFI/FineIBT callsites.
+	 */
+	if (IS_ENABLED(CONFIG_CFI_CLANG) && cfi_call_site(addr))
+		return 0;
 
 	return (addr == paddr);
 }
diff --git a/kernel/cfi.c b/kernel/cfi.c
index 08caad776717..2656e6ffa013 100644
--- a/kernel/cfi.c
+++ b/kernel/cfi.c
@@ -31,16 +31,22 @@ static inline unsigned long trap_address(s32 *p)
 	return (unsigned long)((long)p + (long)*p);
 }
 
-static bool is_trap(unsigned long addr, s32 *start, s32 *end)
+static long cfi_trap_distance(unsigned long addr, s32 *start, s32 *end)
 {
+	long dist = LONG_MAX;
 	s32 *p;
 
 	for (p = start; p < end; ++p) {
-		if (trap_address(p) == addr)
-			return true;
+		long d = trap_address(p) - addr;
+
+		if (abs(dist) < abs(d)) {
+			dist = d;
+			if (dist == 0)
+				return 0;
+		}
 	}
 
-	return false;
+	return dist;
 }
 
 #ifdef CONFIG_MODULES
@@ -66,25 +72,25 @@ void module_cfi_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
 	}
 }
 
-static bool is_module_cfi_trap(unsigned long addr)
+static long module_cfi_trap_distance(unsigned long addr)
 {
 	struct module *mod;
-	bool found = false;
+	long dist = LONG_MAX;
 
 	rcu_read_lock_sched_notrace();
 
 	mod = __module_address(addr);
 	if (mod)
-		found = is_trap(addr, mod->kcfi_traps, mod->kcfi_traps_end);
+		dist = cfi_trap_distance(addr, mod->kcfi_traps, mod->kcfi_traps_end);
 
 	rcu_read_unlock_sched_notrace();
 
 	return found;
 }
 #else /* CONFIG_MODULES */
-static inline bool is_module_cfi_trap(unsigned long addr)
+static long module_cfi_trap_distance(unsigned long addr)
 {
-	return false;
+	return LONG_MAX;
 }
 #endif /* CONFIG_MODULES */
 
@@ -93,9 +99,24 @@ extern s32 __stop___kcfi_traps[];
 
 bool is_cfi_trap(unsigned long addr)
 {
-	if (is_trap(addr, __start___kcfi_traps, __stop___kcfi_traps))
+	long dist = cfi_trap_distance(addr, __start___kcfi_traps, __stop___kcfi_traps);
+	if (!dist)
+		return true;
+
+	return module_cfi_trap_distance(addr) == 0;
+}
+
+bool cfi_call_site(unsigned long addr)
+{
+	long dist = cfi_trap_distance(addr, __start___kcfi_traps, __stop___kcfi_traps);
+	if (dist >= -12 && dist <= 0)
+		return true;
+
+	dist = module_cfi_trap_distance(addr);
+	if (dist >= -12 && dist <= 0)
 		return true;
 
-	return is_module_cfi_trap(addr);
+	return false;
 }
+
 #endif /* CONFIG_ARCH_USES_CFI_TRAPS */
  
Peter Zijlstra July 10, 2023, 4:21 p.m. UTC | #2
On Mon, Jul 10, 2023 at 06:16:43PM +0200, Peter Zijlstra wrote:

> diff --git a/kernel/cfi.c b/kernel/cfi.c
> index 08caad776717..2656e6ffa013 100644
> --- a/kernel/cfi.c
> +++ b/kernel/cfi.c
> @@ -31,16 +31,22 @@ static inline unsigned long trap_address(s32 *p)
>  	return (unsigned long)((long)p + (long)*p);
>  }
>  
> -static bool is_trap(unsigned long addr, s32 *start, s32 *end)
> +static long cfi_trap_distance(unsigned long addr, s32 *start, s32 *end)
>  {
> +	long dist = LONG_MAX;
>  	s32 *p;
>  
>  	for (p = start; p < end; ++p) {
> -		if (trap_address(p) == addr)
> -			return true;
> +		long d = trap_address(p) - addr;
> +
> +		if (abs(dist) < abs(d)) {

Not that I expect anybody will care, but that should obviously be:

		abs(d) < abs(dist)

> +			dist = d;
> +			if (dist == 0)
> +				return 0;
> +		}
>  	}
>  
> -	return false;
> +	return dist;
>  }
  
Masami Hiramatsu (Google) July 10, 2023, 11:58 p.m. UTC | #3
On Mon, 10 Jul 2023 18:16:43 +0200
Peter Zijlstra <peterz@infradead.org> wrote:

> On Mon, Jul 10, 2023 at 09:14:35PM +0900, Masami Hiramatsu (Google) wrote:
> > +	if (IS_ENABLED(CONFIG_CFI_CLANG)) {
> > +		/*
> > +		 * The compiler generates the following instruction sequence
> > +		 * for indirect call checks and cfi.c decodes this;
> > +		 *
> > +		 *   movl    -<id>, %r10d       ; 6 bytes
> > +		 *   addl    -4(%reg), %r10d    ; 4 bytes
> > +		 *   je      .Ltmp1             ; 2 bytes
> > +		 *   ud2                        ; <- regs->ip
> > +		 *   .Ltmp1:
> > +		 *
> > +		 * Also, these movl and addl are used for showing expected
> > +		 * type. So those must not be touched.
> > +		 */
> > +		__addr = recover_probed_instruction(buf, addr);
> > +		if (!__addr)
> > +			return 0;
> > +
> > +		if (insn_decode_kernel(&insn, (void *)__addr) < 0)
> > +			return 0;
> > +
> > +		if (insn.opcode.value == 0xBA)
> > +			offset = 12;
> > +		else if (insn.opcode.value == 0x3)
> > +			offset = 6;
> > +		else
> > +			goto out;
> 
> Notably, the JE will already be avoided?
> 
> > +
> > +		/* This movl/addl is used for decoding CFI. */
> > +		if (is_cfi_trap(addr + offset))
> > +			return 0;
> > +	}
> >  
> > +out:
> >  	return (addr == paddr);
> >  }
> 
> Hmm, so I was thinking something like the below, which also catches
> things when we rewrite kCFI to FineIBT, except I don't think we care if
> the FineIBT callsite gets re-written. FineIBT only relies on the __cfi_
> symbol not getting poked at (which the previous patches should ensure).

Oh, is FineIBT different from kCFI? I thought those are same. But anyway
for kCFI=y && FineIBT=n case, I think this code still needed.

> 
> Additionally is_cfi_trap() is horrifically slow -- it's a full linear
> search of the entire kcfi_traps array, it doesn't have any smarts on
> (#UD can hardly be considered a fast path).

Yeah, register_kprobe() is not a fast path in most cases. So I think
this is OK at this point. We can speed it up by sorting the array by
address and binary search later.

> 
> So I tihnk I'm ok with the above, just adding the below for reference
> (completely untested and everything).

I wonder the distance can be used outside of x86. CFI implementation
depends on the arch?

> 
> 
> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

Thanks!

> 
> 
> ---
> diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
> index f7f6042eb7e6..b812dee76909 100644
> --- a/arch/x86/kernel/kprobes/core.c
> +++ b/arch/x86/kernel/kprobes/core.c
> @@ -293,6 +293,11 @@ static int can_probe(unsigned long paddr)
>  #endif
>  		addr += insn.length;
>  	}
> +	/*
> +	 * Don't allow poking the kCFI/FineIBT callsites.
> +	 */
> +	if (IS_ENABLED(CONFIG_CFI_CLANG) && cfi_call_site(addr))
> +		return 0;
>  
>  	return (addr == paddr);
>  }
> diff --git a/kernel/cfi.c b/kernel/cfi.c
> index 08caad776717..2656e6ffa013 100644
> --- a/kernel/cfi.c
> +++ b/kernel/cfi.c
> @@ -31,16 +31,22 @@ static inline unsigned long trap_address(s32 *p)
>  	return (unsigned long)((long)p + (long)*p);
>  }
>  
> -static bool is_trap(unsigned long addr, s32 *start, s32 *end)
> +static long cfi_trap_distance(unsigned long addr, s32 *start, s32 *end)
>  {
> +	long dist = LONG_MAX;
>  	s32 *p;
>  
>  	for (p = start; p < end; ++p) {
> -		if (trap_address(p) == addr)
> -			return true;
> +		long d = trap_address(p) - addr;
> +
> +		if (abs(dist) < abs(d)) {
> +			dist = d;
> +			if (dist == 0)
> +				return 0;
> +		}
>  	}
>  
> -	return false;
> +	return dist;
>  }
>  
>  #ifdef CONFIG_MODULES
> @@ -66,25 +72,25 @@ void module_cfi_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
>  	}
>  }
>  
> -static bool is_module_cfi_trap(unsigned long addr)
> +static long module_cfi_trap_distance(unsigned long addr)
>  {
>  	struct module *mod;
> -	bool found = false;
> +	long dist = LONG_MAX;
>  
>  	rcu_read_lock_sched_notrace();
>  
>  	mod = __module_address(addr);
>  	if (mod)
> -		found = is_trap(addr, mod->kcfi_traps, mod->kcfi_traps_end);
> +		dist = cfi_trap_distance(addr, mod->kcfi_traps, mod->kcfi_traps_end);
>  
>  	rcu_read_unlock_sched_notrace();
>  
>  	return found;
>  }
>  #else /* CONFIG_MODULES */
> -static inline bool is_module_cfi_trap(unsigned long addr)
> +static long module_cfi_trap_distance(unsigned long addr)
>  {
> -	return false;
> +	return LONG_MAX;
>  }
>  #endif /* CONFIG_MODULES */
>  
> @@ -93,9 +99,24 @@ extern s32 __stop___kcfi_traps[];
>  
>  bool is_cfi_trap(unsigned long addr)
>  {
> -	if (is_trap(addr, __start___kcfi_traps, __stop___kcfi_traps))
> +	long dist = cfi_trap_distance(addr, __start___kcfi_traps, __stop___kcfi_traps);
> +	if (!dist)
> +		return true;
> +
> +	return module_cfi_trap_distance(addr) == 0;
> +}
> +
> +bool cfi_call_site(unsigned long addr)
> +{
> +	long dist = cfi_trap_distance(addr, __start___kcfi_traps, __stop___kcfi_traps);
> +	if (dist >= -12 && dist <= 0)
> +		return true;
> +
> +	dist = module_cfi_trap_distance(addr);
> +	if (dist >= -12 && dist <= 0)
>  		return true;
>  
> -	return is_module_cfi_trap(addr);
> +	return false;
>  }
> +
>  #endif /* CONFIG_ARCH_USES_CFI_TRAPS */
  
Peter Zijlstra July 11, 2023, 7:04 a.m. UTC | #4
On Tue, Jul 11, 2023 at 08:58:37AM +0900, Masami Hiramatsu wrote:

> Oh, is FineIBT different from kCFI? I thought those are same. But anyway
> for kCFI=y && FineIBT=n case, I think this code still needed.

Yeah, FineIBT relies on kCFI and IBT (and selects CALL_PADDING) and
dynamically re-writes the kCFI infrastructure.

All the gory details are in arch/x86/kernel/alternative.c, search for
CONFIG_FINEIBT, I've tried to write a big comment, but please let me
know if something isn't clear and I'll write some actual words :-).

But basically kCFI is a pure software solution and does the check before
the indirect control transfer (it has to). While FineIBT relies on the
IBT hardware in order to do the check after.

As such, FineIBT can do the CFI check without memory loads, which is
good for performance. Another useful property of FineIBT is that it is a
speculation barrier.
  
Peter Zijlstra July 11, 2023, 7:15 a.m. UTC | #5
On Tue, Jul 11, 2023 at 08:58:37AM +0900, Masami Hiramatsu wrote:

> > Hmm, so I was thinking something like the below, which also catches
> > things when we rewrite kCFI to FineIBT, except I don't think we care if
> > the FineIBT callsite gets re-written. FineIBT only relies on the __cfi_
> > symbol not getting poked at (which the previous patches should ensure).
> 
> Oh, is FineIBT different from kCFI? I thought those are same. But anyway
> for kCFI=y && FineIBT=n case, I think this code still needed.

Ah, I forgot to answer your other question; FineIBT is dynamically
patched since it relies on optional hardware features, only if the
hardware has IBT support can FineIBT work.

So yeah, your check is definitely needed. And I think it'll 'gracefully'
fail when FineIBT is patched in because the instructions aren't exactly
the same.

And since FineIBT has most of the magic in the __cfi_ prefix, which
isn't patched per the previous patch, I think we're good on the call-site.

> > So I tihnk I'm ok with the above, just adding the below for reference
> > (completely untested and everything).
> 
> I wonder the distance can be used outside of x86. CFI implementation
> depends on the arch?

Currently only arm64 and x86_64 have CFI, and while the particulars are a
little different, they do have a lot in common, including the reporting.
IIRC the arm64 variant is a little simpler because of fixed instruction
size. There is no worry someone will jump in the middle of an
instruction stream.
  

Patch

diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index f7f6042eb7e6..fa8c2b41cbaf 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -54,6 +54,7 @@ 
 #include <asm/insn.h>
 #include <asm/debugreg.h>
 #include <asm/ibt.h>
+#include <asm/cfi.h>
 
 #include "common.h"
 
@@ -293,7 +294,40 @@  static int can_probe(unsigned long paddr)
 #endif
 		addr += insn.length;
 	}
+	if (IS_ENABLED(CONFIG_CFI_CLANG)) {
+		/*
+		 * The compiler generates the following instruction sequence
+		 * for indirect call checks and cfi.c decodes this;
+		 *
+		 *   movl    -<id>, %r10d       ; 6 bytes
+		 *   addl    -4(%reg), %r10d    ; 4 bytes
+		 *   je      .Ltmp1             ; 2 bytes
+		 *   ud2                        ; <- regs->ip
+		 *   .Ltmp1:
+		 *
+		 * Also, these movl and addl are used for showing expected
+		 * type. So those must not be touched.
+		 */
+		__addr = recover_probed_instruction(buf, addr);
+		if (!__addr)
+			return 0;
+
+		if (insn_decode_kernel(&insn, (void *)__addr) < 0)
+			return 0;
+
+		if (insn.opcode.value == 0xBA)
+			offset = 12;
+		else if (insn.opcode.value == 0x3)
+			offset = 6;
+		else
+			goto out;
+
+		/* This movl/addl is used for decoding CFI. */
+		if (is_cfi_trap(addr + offset))
+			return 0;
+	}
 
+out:
 	return (addr == paddr);
 }