[2/3] x86/callthunks: Handle %rip-relative relocations in call thunk template
Commit Message
Contrary to alternatives, relocations are currently not supported in
call thunk templates. Implement minimal support for relocations
when copying template from its storage location to handle %rip-relative
addresses.
Support for relocations will be needed when PER_CPU_VAR macro switches
to %rip-relative addressing.
The patch allows unification of ASM_INCREMENT_CALL_DEPTH, which already
uses PER_CPU_VAR macro, with INCREMENT_CALL_DEPTH, used in call thunk
template, which is currently limited to use absolute address.
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
---
arch/x86/kernel/callthunks.c | 63 ++++++++++++++++++++++++++++++++----
1 file changed, 56 insertions(+), 7 deletions(-)
Comments
On Thu, Nov 02, 2023 at 12:25:47PM +0100, Uros Bizjak wrote:
> @@ -166,13 +168,51 @@ static const u8 nops[] = {
> 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
> };
>
> +#define apply_reloc_n(n_, p_, d_) \
> + do { \
> + s32 v = *(s##n_ *)(p_); \
> + v += (d_); \
> + BUG_ON((v >> 31) != (v >> (n_-1))); \
> + *(s##n_ *)(p_) = (s##n_)v; \
> + } while (0)
> +
> +static __always_inline
> +void apply_reloc(int n, void *ptr, uintptr_t diff)
> +{
> + switch (n) {
> + case 4: apply_reloc_n(32, ptr, diff); break;
> + default: BUG();
> + }
> +}
> +
> +static void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src)
> +{
> + for (int next, i = 0; i < len; i = next) {
> + struct insn insn;
> +
> + if (WARN_ON_ONCE(insn_decode_kernel(&insn, &buf[i])))
> + return;
> +
> + next = i + insn.length;
> +
> + if (insn_rip_relative(&insn))
> + apply_reloc(insn.displacement.nbytes,
> + buf + i + insn_offset_displacement(&insn),
> + src - dest);
> + }
> +}
Isn't it simpler to use apply_relocation() from alternative.c?
Remove static, add decl, stuff like that?
On Thu, Nov 2, 2023 at 12:44 PM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Thu, Nov 02, 2023 at 12:25:47PM +0100, Uros Bizjak wrote:
>
> > @@ -166,13 +168,51 @@ static const u8 nops[] = {
> > 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
> > };
> >
> > +#define apply_reloc_n(n_, p_, d_) \
> > + do { \
> > + s32 v = *(s##n_ *)(p_); \
> > + v += (d_); \
> > + BUG_ON((v >> 31) != (v >> (n_-1))); \
> > + *(s##n_ *)(p_) = (s##n_)v; \
> > + } while (0)
> > +
> > +static __always_inline
> > +void apply_reloc(int n, void *ptr, uintptr_t diff)
> > +{
> > + switch (n) {
> > + case 4: apply_reloc_n(32, ptr, diff); break;
> > + default: BUG();
> > + }
> > +}
> > +
> > +static void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src)
> > +{
> > + for (int next, i = 0; i < len; i = next) {
> > + struct insn insn;
> > +
> > + if (WARN_ON_ONCE(insn_decode_kernel(&insn, &buf[i])))
> > + return;
> > +
> > + next = i + insn.length;
> > +
> > + if (insn_rip_relative(&insn))
> > + apply_reloc(insn.displacement.nbytes,
> > + buf + i + insn_offset_displacement(&insn),
> > + src - dest);
> > + }
> > +}
>
> Isn't it simpler to use apply_relocation() from alternative.c?
Yes, I was looking at that function, but somehow thought that it is a
bit overkill here, since we just need a %rip-relative reloc.
> Remove static, add decl, stuff like that?
On second thought, you are right. Should I move the above function
somewhere (reloc.c?) , or can I just use it from alternative.c and add
decl (where?) ?
Thanks,
Uros.
On Thu, Nov 02, 2023 at 12:50:01PM +0100, Uros Bizjak wrote:
> > Remove static, add decl, stuff like that?
>
> On second thought, you are right. Should I move the above function
> somewhere (reloc.c?) , or can I just use it from alternative.c and add
> decl (where?) ?
Yeah, leave it there for now, perhaps asm/text-patching.h ?
On Thu, Nov 2, 2023 at 12:56 PM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Thu, Nov 02, 2023 at 12:50:01PM +0100, Uros Bizjak wrote:
>
> > > Remove static, add decl, stuff like that?
> >
> > On second thought, you are right. Should I move the above function
> > somewhere (reloc.c?) , or can I just use it from alternative.c and add
> > decl (where?) ?
>
> Yeah, leave it there for now, perhaps asm/text-patching.h ?
The new version boots OK and looks like the attached patch.
Uros.
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index 29832c338cdc..ba8d900f3ebe 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -18,6 +18,8 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
#define __parainstructions_end NULL
#endif
+void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len);
+
/*
* Currently, the max observed size in the kernel code is
* JUMP_LABEL_NOP_SIZE/RELATIVEJUMP_SIZE, which are 5.
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 73be3931e4f0..66140c54d4f6 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -325,8 +325,7 @@ bool need_reloc(unsigned long offset, u8 *src, size_t src_len)
return (target < src || target > src + src_len);
}
-static void __init_or_module noinline
-apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
+void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
{
int prev, target = 0;
diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
index d0922cf94c90..832eaec36e2b 100644
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -24,6 +24,8 @@
static int __initdata_or_module debug_callthunks;
+#define MAX_PATCH_LEN (255-1)
+
#define prdbg(fmt, args...) \
do { \
if (debug_callthunks) \
@@ -169,10 +171,15 @@ static const u8 nops[] = {
static void *patch_dest(void *dest, bool direct)
{
unsigned int tsize = SKL_TMPL_SIZE;
+ u8 insn_buff[MAX_PATCH_LEN];
u8 *pad = dest - tsize;
+ memcpy(insn_buff, skl_call_thunk_template, tsize);
+ apply_relocation(insn_buff, tsize, pad,
+ skl_call_thunk_template, tsize);
+
/* Already patched? */
- if (!bcmp(pad, skl_call_thunk_template, tsize))
+ if (!bcmp(pad, insn_buff, tsize))
return pad;
/* Ensure there are nops */
@@ -182,9 +189,9 @@ static void *patch_dest(void *dest, bool direct)
}
if (direct)
- memcpy(pad, skl_call_thunk_template, tsize);
+ memcpy(pad, insn_buff, tsize);
else
- text_poke_copy_locked(pad, skl_call_thunk_template, tsize, true);
+ text_poke_copy_locked(pad, insn_buff, tsize, true);
return pad;
}
@@ -281,20 +288,27 @@ void *callthunks_translate_call_dest(void *dest)
static bool is_callthunk(void *addr)
{
unsigned int tmpl_size = SKL_TMPL_SIZE;
- void *tmpl = skl_call_thunk_template;
+ u8 insn_buff[MAX_PATCH_LEN];
unsigned long dest;
+ u8 *pad;
dest = roundup((unsigned long)addr, CONFIG_FUNCTION_ALIGNMENT);
if (!thunks_initialized || skip_addr((void *)dest))
return false;
- return !bcmp((void *)(dest - tmpl_size), tmpl, tmpl_size);
+ *pad = dest - tmpl_size;
+
+ memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
+ apply_relocation(insn_buff, tmpl_size, pad,
+ skl_call_thunk_template, tmpl_size);
+
+ return !bcmp(pad, insn_buff, tmpl_size);
}
int x86_call_depth_emit_accounting(u8 **pprog, void *func)
{
unsigned int tmpl_size = SKL_TMPL_SIZE;
- void *tmpl = skl_call_thunk_template;
+ u8 insn_buff[MAX_PATCH_LEN];
if (!thunks_initialized)
return 0;
@@ -303,7 +317,11 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func)
if (func && is_callthunk(func))
return 0;
- memcpy(*pprog, tmpl, tmpl_size);
+ memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
+ apply_relocation(insn_buff, tmpl_size, *pprog,
+ skl_call_thunk_template, tmpl_size);
+
+ memcpy(*pprog, insn_buff, tmpl_size);
*pprog += tmpl_size;
return tmpl_size;
}
@@ -24,6 +24,8 @@
static int __initdata_or_module debug_callthunks;
+#define MAX_PATCH_LEN (255-1)
+
#define prdbg(fmt, args...) \
do { \
if (debug_callthunks) \
@@ -166,13 +168,51 @@ static const u8 nops[] = {
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
};
+#define apply_reloc_n(n_, p_, d_) \
+ do { \
+ s32 v = *(s##n_ *)(p_); \
+ v += (d_); \
+ BUG_ON((v >> 31) != (v >> (n_-1))); \
+ *(s##n_ *)(p_) = (s##n_)v; \
+ } while (0)
+
+static __always_inline
+void apply_reloc(int n, void *ptr, uintptr_t diff)
+{
+ switch (n) {
+ case 4: apply_reloc_n(32, ptr, diff); break;
+ default: BUG();
+ }
+}
+
+static void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src)
+{
+ for (int next, i = 0; i < len; i = next) {
+ struct insn insn;
+
+ if (WARN_ON_ONCE(insn_decode_kernel(&insn, &buf[i])))
+ return;
+
+ next = i + insn.length;
+
+ if (insn_rip_relative(&insn))
+ apply_reloc(insn.displacement.nbytes,
+ buf + i + insn_offset_displacement(&insn),
+ src - dest);
+ }
+}
+
static void *patch_dest(void *dest, bool direct)
{
unsigned int tsize = SKL_TMPL_SIZE;
+ u8 insn_buff[MAX_PATCH_LEN];
u8 *pad = dest - tsize;
+ memcpy(insn_buff, skl_call_thunk_template, tsize);
+ apply_relocation(insn_buff, tsize, pad, skl_call_thunk_template);
+
/* Already patched? */
- if (!bcmp(pad, skl_call_thunk_template, tsize))
+ if (!bcmp(pad, insn_buff, tsize))
return pad;
/* Ensure there are nops */
@@ -182,9 +222,9 @@ static void *patch_dest(void *dest, bool direct)
}
if (direct)
- memcpy(pad, skl_call_thunk_template, tsize);
+ memcpy(pad, insn_buff, tsize);
else
- text_poke_copy_locked(pad, skl_call_thunk_template, tsize, true);
+ text_poke_copy_locked(pad, insn_buff, tsize, true);
return pad;
}
@@ -281,20 +321,26 @@ void *callthunks_translate_call_dest(void *dest)
static bool is_callthunk(void *addr)
{
unsigned int tmpl_size = SKL_TMPL_SIZE;
- void *tmpl = skl_call_thunk_template;
+ u8 insn_buff[MAX_PATCH_LEN];
unsigned long dest;
+ u8 *pad;
dest = roundup((unsigned long)addr, CONFIG_FUNCTION_ALIGNMENT);
if (!thunks_initialized || skip_addr((void *)dest))
return false;
- return !bcmp((void *)(dest - tmpl_size), tmpl, tmpl_size);
+ *pad = dest - tmpl_size;
+
+ memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
+ apply_relocation(insn_buff, tmpl_size, pad, skl_call_thunk_template);
+
+ return !bcmp(pad, insn_buff, tmpl_size);
}
int x86_call_depth_emit_accounting(u8 **pprog, void *func)
{
unsigned int tmpl_size = SKL_TMPL_SIZE;
- void *tmpl = skl_call_thunk_template;
+ u8 insn_buff[MAX_PATCH_LEN];
if (!thunks_initialized)
return 0;
@@ -303,7 +349,10 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func)
if (func && is_callthunk(func))
return 0;
- memcpy(*pprog, tmpl, tmpl_size);
+ memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
+ apply_relocation(insn_buff, tmpl_size, *pprog, skl_call_thunk_template);
+
+ memcpy(*pprog, insn_buff, tmpl_size);
*pprog += tmpl_size;
return tmpl_size;
}