[-tip,v2,2/3] x86/callthunks: Handle %rip-relative relocations in call thunk template

Message ID 20231105213731.1878100-3-ubizjak@gmail.com
State New
Headers
Series x86/callthunks: Fix and unify call thunk assembly snippets |

Commit Message

Uros Bizjak Nov. 5, 2023, 9:34 p.m. UTC
  Contrary to alternatives, relocations are currently not supported in
call thunk templates.  Re-use the existing infrastructure from
alternative.c to allow %rip-relative relocations when copying call
thunk template from its storage location.

The patch allows unification of ASM_INCREMENT_CALL_DEPTH, which already
uses PER_CPU_VAR macro, with INCREMENT_CALL_DEPTH, used in call thunk
template, which is currently limited to use absolute address.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
---
v2: Reuse existing relocation infrastructure from alternative.c.
---
 arch/x86/include/asm/text-patching.h |  2 ++
 arch/x86/kernel/alternative.c        |  3 +--
 arch/x86/kernel/callthunks.c         | 32 ++++++++++++++++++++++------
 3 files changed, 28 insertions(+), 9 deletions(-)
  

Comments

Nathan Chancellor Dec. 1, 2023, 3:54 a.m. UTC | #1
Hi Uros,

On Sun, Nov 05, 2023 at 10:34:36PM +0100, Uros Bizjak wrote:
> Contrary to alternatives, relocations are currently not supported in
> call thunk templates.  Re-use the existing infrastructure from
> alternative.c to allow %rip-relative relocations when copying call
> thunk template from its storage location.
> 
> The patch allows unification of ASM_INCREMENT_CALL_DEPTH, which already
> uses PER_CPU_VAR macro, with INCREMENT_CALL_DEPTH, used in call thunk
> template, which is currently limited to use absolute address.
> 
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Ingo Molnar <mingo@kernel.org>
> Cc: Borislav Petkov <bp@alien8.de>
> Cc: Dave Hansen <dave.hansen@linux.intel.com>
> Cc: "H. Peter Anvin" <hpa@zytor.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
...
> diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
> index e9ad518a5003..ef9c04707b3c 100644
> --- a/arch/x86/kernel/callthunks.c
> +++ b/arch/x86/kernel/callthunks.c
...
> @@ -291,20 +298,27 @@ void *callthunks_translate_call_dest(void *dest)
>  static bool is_callthunk(void *addr)
>  {
>  	unsigned int tmpl_size = SKL_TMPL_SIZE;
> -	void *tmpl = skl_call_thunk_template;
> +	u8 insn_buff[MAX_PATCH_LEN];
>  	unsigned long dest;
> +	u8 *pad;
>  
>  	dest = roundup((unsigned long)addr, CONFIG_FUNCTION_ALIGNMENT);
>  	if (!thunks_initialized || skip_addr((void *)dest))
>  		return false;
>  
> -	return !bcmp((void *)(dest - tmpl_size), tmpl, tmpl_size);
> +	*pad = dest - tmpl_size;

Clang warns (or errors with CONFIG_WERROR=y):

  arch/x86/kernel/callthunks.c:315:3: error: variable 'pad' is uninitialized when used here [-Werror,-Wuninitialized]
    315 |         *pad = dest - tmpl_size;
        |          ^~~
  arch/x86/kernel/callthunks.c:309:9: note: initialize the variable 'pad' to silence this warning
    309 |         u8 *pad;
        |                ^
        |                 = NULL
  1 error generated.

which came from our continuous integration:

https://github.com/ClangBuiltLinux/continuous-integration2/actions/runs/7054081453/job/19205345548
https://storage.tuxsuite.com/public/clangbuiltlinux/continuous-integration2/builds/2Yv1FATZZIeD3P7S57ZkHYhyZ8A/build.log

> +
> +	memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
> +	apply_relocation(insn_buff, tmpl_size, pad,
> +			 skl_call_thunk_template, tmpl_size);
> +
> +	return !bcmp(pad, insn_buff, tmpl_size);
>  }

Cheers,
Nathan
  
Uros Bizjak Dec. 1, 2023, 7:48 a.m. UTC | #2
On Fri, Dec 1, 2023 at 4:55 AM Nathan Chancellor <nathan@kernel.org> wrote:
>
> Hi Uros,
>
> On Sun, Nov 05, 2023 at 10:34:36PM +0100, Uros Bizjak wrote:
> > Contrary to alternatives, relocations are currently not supported in
> > call thunk templates.  Re-use the existing infrastructure from
> > alternative.c to allow %rip-relative relocations when copying call
> > thunk template from its storage location.
> >
> > The patch allows unification of ASM_INCREMENT_CALL_DEPTH, which already
> > uses PER_CPU_VAR macro, with INCREMENT_CALL_DEPTH, used in call thunk
> > template, which is currently limited to use absolute address.
> >
> > Cc: Thomas Gleixner <tglx@linutronix.de>
> > Cc: Ingo Molnar <mingo@kernel.org>
> > Cc: Borislav Petkov <bp@alien8.de>
> > Cc: Dave Hansen <dave.hansen@linux.intel.com>
> > Cc: "H. Peter Anvin" <hpa@zytor.com>
> > Cc: Peter Zijlstra <peterz@infradead.org>
> > Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
> ...
> > diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
> > index e9ad518a5003..ef9c04707b3c 100644
> > --- a/arch/x86/kernel/callthunks.c
> > +++ b/arch/x86/kernel/callthunks.c
> ...
> > @@ -291,20 +298,27 @@ void *callthunks_translate_call_dest(void *dest)
> >  static bool is_callthunk(void *addr)
> >  {
> >       unsigned int tmpl_size = SKL_TMPL_SIZE;
> > -     void *tmpl = skl_call_thunk_template;
> > +     u8 insn_buff[MAX_PATCH_LEN];
> >       unsigned long dest;
> > +     u8 *pad;
> >
> >       dest = roundup((unsigned long)addr, CONFIG_FUNCTION_ALIGNMENT);
> >       if (!thunks_initialized || skip_addr((void *)dest))
> >               return false;
> >
> > -     return !bcmp((void *)(dest - tmpl_size), tmpl, tmpl_size);
> > +     *pad = dest - tmpl_size;
>
> Clang warns (or errors with CONFIG_WERROR=y):

Uh, GCC didn't warn at all (and there is some mixup with types here,
so a thinko slipped through.

The attached patch fixes the oversight. I'll post a formal patch later
today after some more testing.

Thanks,
Uros.
diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
index f5507c95e7be..71b74a07c8ee 100644
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -306,7 +306,7 @@ static bool is_callthunk(void *addr)
 	if (!thunks_initialized || skip_addr((void *)dest))
 		return false;
 
-	*pad = dest - tmpl_size;
+	pad = (void *)dest - tmpl_size;
 
 	memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
 	apply_relocation(insn_buff, tmpl_size, pad,
  

Patch

diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index 29832c338cdc..ba8d900f3ebe 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -18,6 +18,8 @@  static inline void apply_paravirt(struct paravirt_patch_site *start,
 #define __parainstructions_end	NULL
 #endif
 
+void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len);
+
 /*
  * Currently, the max observed size in the kernel code is
  * JUMP_LABEL_NOP_SIZE/RELATIVEJUMP_SIZE, which are 5.
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 73be3931e4f0..66140c54d4f6 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -325,8 +325,7 @@  bool need_reloc(unsigned long offset, u8 *src, size_t src_len)
 	return (target < src || target > src + src_len);
 }
 
-static void __init_or_module noinline
-apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
+void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
 {
 	int prev, target = 0;
 
diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
index e9ad518a5003..ef9c04707b3c 100644
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -24,6 +24,8 @@ 
 
 static int __initdata_or_module debug_callthunks;
 
+#define MAX_PATCH_LEN (255-1)
+
 #define prdbg(fmt, args...)					\
 do {								\
 	if (debug_callthunks)					\
@@ -179,10 +181,15 @@  static const u8 nops[] = {
 static void *patch_dest(void *dest, bool direct)
 {
 	unsigned int tsize = SKL_TMPL_SIZE;
+	u8 insn_buff[MAX_PATCH_LEN];
 	u8 *pad = dest - tsize;
 
+	memcpy(insn_buff, skl_call_thunk_template, tsize);
+	apply_relocation(insn_buff, tsize, pad,
+			 skl_call_thunk_template, tsize);
+
 	/* Already patched? */
-	if (!bcmp(pad, skl_call_thunk_template, tsize))
+	if (!bcmp(pad, insn_buff, tsize))
 		return pad;
 
 	/* Ensure there are nops */
@@ -192,9 +199,9 @@  static void *patch_dest(void *dest, bool direct)
 	}
 
 	if (direct)
-		memcpy(pad, skl_call_thunk_template, tsize);
+		memcpy(pad, insn_buff, tsize);
 	else
-		text_poke_copy_locked(pad, skl_call_thunk_template, tsize, true);
+		text_poke_copy_locked(pad, insn_buff, tsize, true);
 	return pad;
 }
 
@@ -291,20 +298,27 @@  void *callthunks_translate_call_dest(void *dest)
 static bool is_callthunk(void *addr)
 {
 	unsigned int tmpl_size = SKL_TMPL_SIZE;
-	void *tmpl = skl_call_thunk_template;
+	u8 insn_buff[MAX_PATCH_LEN];
 	unsigned long dest;
+	u8 *pad;
 
 	dest = roundup((unsigned long)addr, CONFIG_FUNCTION_ALIGNMENT);
 	if (!thunks_initialized || skip_addr((void *)dest))
 		return false;
 
-	return !bcmp((void *)(dest - tmpl_size), tmpl, tmpl_size);
+	*pad = dest - tmpl_size;
+
+	memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
+	apply_relocation(insn_buff, tmpl_size, pad,
+			 skl_call_thunk_template, tmpl_size);
+
+	return !bcmp(pad, insn_buff, tmpl_size);
 }
 
 int x86_call_depth_emit_accounting(u8 **pprog, void *func)
 {
 	unsigned int tmpl_size = SKL_TMPL_SIZE;
-	void *tmpl = skl_call_thunk_template;
+	u8 insn_buff[MAX_PATCH_LEN];
 
 	if (!thunks_initialized)
 		return 0;
@@ -313,7 +327,11 @@  int x86_call_depth_emit_accounting(u8 **pprog, void *func)
 	if (func && is_callthunk(func))
 		return 0;
 
-	memcpy(*pprog, tmpl, tmpl_size);
+	memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
+	apply_relocation(insn_buff, tmpl_size, *pprog,
+			 skl_call_thunk_template, tmpl_size);
+
+	memcpy(*pprog, insn_buff, tmpl_size);
 	*pprog += tmpl_size;
 	return tmpl_size;
 }