[RFC,13/43] x86/paravirt: Use relative reference for original instruction

Message ID d0e601438f4d140c0c383c6ad09483b34c7c572e.1682673543.git.houwenlong.hwl@antgroup.com
State New
Headers
Series x86/pie: Make kernel image's virtual address flexible |

Commit Message

Hou Wenlong April 28, 2023, 9:50 a.m. UTC
  Similar to the alternative patching, use relative reference for original
instruction rather than absolute one, which saves 8 bytes for one entry
on x86_64.  And it could generate R_X86_64_PC32 relocation instead of
R_X86_64_64 relocation, which also reduces relocation metadata on
relocatable builds. And the alignment could be hard coded to be 4 now.

Signed-off-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
Cc: Thomas Garnier <thgarnie@chromium.org>
Cc: Lai Jiangshan <jiangshan.ljs@antgroup.com>
Cc: Kees Cook <keescook@chromium.org>
---
 arch/x86/include/asm/paravirt.h       | 10 +++++-----
 arch/x86/include/asm/paravirt_types.h |  8 ++++----
 arch/x86/kernel/alternative.c         |  8 +++++---
 arch/x86/kernel/callthunks.c          |  2 +-
 4 files changed, 15 insertions(+), 13 deletions(-)
  

Comments

Juergen Gross June 1, 2023, 9:29 a.m. UTC | #1
On 28.04.23 11:50, Hou Wenlong wrote:
> Similar to the alternative patching, use relative reference for original
> instruction rather than absolute one, which saves 8 bytes for one entry
> on x86_64.  And it could generate R_X86_64_PC32 relocation instead of
> R_X86_64_64 relocation, which also reduces relocation metadata on
> relocatable builds. And the alignment could be hard coded to be 4 now.
> 
> Signed-off-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
> Cc: Thomas Garnier <thgarnie@chromium.org>
> Cc: Lai Jiangshan <jiangshan.ljs@antgroup.com>
> Cc: Kees Cook <keescook@chromium.org>

Reviewed-by: Juergen Gross <jgross@suse.com>

I think this patch should be taken even without the series.


Juergen
  
Nadav Amit June 5, 2023, 6:40 a.m. UTC | #2
> On Jun 1, 2023, at 2:29 AM, Juergen Gross <jgross@suse.com> wrote:
> 
> On 28.04.23 11:50, Hou Wenlong wrote:
>> Similar to the alternative patching, use relative reference for original
>> instruction rather than absolute one, which saves 8 bytes for one entry
>> on x86_64.  And it could generate R_X86_64_PC32 relocation instead of
>> R_X86_64_64 relocation, which also reduces relocation metadata on
>> relocatable builds. And the alignment could be hard coded to be 4 now.
>> Signed-off-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
>> Cc: Thomas Garnier <thgarnie@chromium.org>
>> Cc: Lai Jiangshan <jiangshan.ljs@antgroup.com>
>> Cc: Kees Cook <keescook@chromium.org>
> 
> Reviewed-by: Juergen Gross <jgross@suse.com>
> 
> I think this patch should be taken even without the series.

It looks good to me, I am just not sure what the alignment is needed
at all.

Why not to make the struct __packed (like struct alt_instr) and get rid
of all the .align directives? Am I missing something?
  
Hou Wenlong June 6, 2023, 11:35 a.m. UTC | #3
On Mon, Jun 05, 2023 at 02:40:54PM +0800, Nadav Amit wrote:
> 
> 
> > On Jun 1, 2023, at 2:29 AM, Juergen Gross <jgross@suse.com> wrote:
> > 
> > On 28.04.23 11:50, Hou Wenlong wrote:
> >> Similar to the alternative patching, use relative reference for original
> >> instruction rather than absolute one, which saves 8 bytes for one entry
> >> on x86_64.  And it could generate R_X86_64_PC32 relocation instead of
> >> R_X86_64_64 relocation, which also reduces relocation metadata on
> >> relocatable builds. And the alignment could be hard coded to be 4 now.
> >> Signed-off-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
> >> Cc: Thomas Garnier <thgarnie@chromium.org>
> >> Cc: Lai Jiangshan <jiangshan.ljs@antgroup.com>
> >> Cc: Kees Cook <keescook@chromium.org>
> > 
> > Reviewed-by: Juergen Gross <jgross@suse.com>
> > 
> > I think this patch should be taken even without the series.
> 
> It looks good to me, I am just not sure what the alignment is needed
> at all.
> 
> Why not to make the struct __packed (like struct alt_instr) and get rid
> of all the .align directives? Am I missing something?

Yes, making the struct __packed can save more space. If I understand
correctly, it could be done even without this patch but it may lead to
misaligned memory access. However, it seems to not matter as I didn't
find any related log for packing struct alt_instr. I can do such things
if needed.

Thanks.
  

Patch

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index b49778664d2b..2350ceb43db0 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -742,16 +742,16 @@  extern void default_banner(void);
 
 #else  /* __ASSEMBLY__ */
 
-#define _PVSITE(ptype, ops, word, algn)		\
+#define _PVSITE(ptype, ops)			\
 771:;						\
 	ops;					\
 772:;						\
 	.pushsection .parainstructions,"a";	\
-	 .align	algn;				\
-	 word 771b;				\
+	 .align	4;				\
+	 .long 771b-.;				\
 	 .byte ptype;				\
 	 .byte 772b-771b;			\
-	 _ASM_ALIGN;				\
+	 .align 4;				\
 	.popsection
 
 
@@ -759,7 +759,7 @@  extern void default_banner(void);
 #ifdef CONFIG_PARAVIRT_XXL
 
 #define PARA_PATCH(off)		((off) / 8)
-#define PARA_SITE(ptype, ops)	_PVSITE(ptype, ops, .quad, 8)
+#define PARA_SITE(ptype, ops)	_PVSITE(ptype, ops)
 #define PARA_INDIRECT(addr)	*addr(%rip)
 
 #ifdef CONFIG_DEBUG_ENTRY
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 4acbcddddc29..982a234f5a06 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -5,7 +5,7 @@ 
 #ifndef __ASSEMBLY__
 /* These all sit in the .parainstructions section to tell us what to patch. */
 struct paravirt_patch_site {
-	u8 *instr;		/* original instructions */
+	s32 instr_offset;	/* original instructions */
 	u8 type;		/* type of this instruction */
 	u8 len;			/* length of original instruction */
 };
@@ -270,11 +270,11 @@  extern struct paravirt_patch_template pv_ops;
 #define _paravirt_alt(insn_string, type)		\
 	"771:\n\t" insn_string "\n" "772:\n"		\
 	".pushsection .parainstructions,\"a\"\n"	\
-	_ASM_ALIGN "\n"					\
-	_ASM_PTR " 771b\n"				\
+	"  .align 4\n"					\
+	"  .long 771b-.\n"				\
 	"  .byte " type "\n"				\
 	"  .byte 772b-771b\n"				\
-	_ASM_ALIGN "\n"					\
+	"  .align 4\n"					\
 	".popsection\n"
 
 /* Generate patchable code, with the default asm parameters. */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index f615e0cb6d93..25c59da6c53b 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1230,20 +1230,22 @@  void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
 {
 	struct paravirt_patch_site *p;
 	char insn_buff[MAX_PATCH_LEN];
+	u8 *instr;
 
 	for (p = start; p < end; p++) {
 		unsigned int used;
 
+		instr = (u8 *)&p->instr_offset + p->instr_offset;
 		BUG_ON(p->len > MAX_PATCH_LEN);
 		/* prep the buffer with the original instructions */
-		memcpy(insn_buff, p->instr, p->len);
-		used = paravirt_patch(p->type, insn_buff, (unsigned long)p->instr, p->len);
+		memcpy(insn_buff, instr, p->len);
+		used = paravirt_patch(p->type, insn_buff, (unsigned long)instr, p->len);
 
 		BUG_ON(used > p->len);
 
 		/* Pad the rest with nops */
 		add_nops(insn_buff + used, p->len - used);
-		text_poke_early(p->instr, insn_buff, p->len);
+		text_poke_early(instr, insn_buff, p->len);
 	}
 }
 extern struct paravirt_patch_site __start_parainstructions[],
diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
index ffea98f9064b..f15405acfd42 100644
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -245,7 +245,7 @@  patch_paravirt_call_sites(struct paravirt_patch_site *start,
 	struct paravirt_patch_site *p;
 
 	for (p = start; p < end; p++)
-		patch_call(p->instr, ct);
+		patch_call((void *)&p->instr_offset + p->instr_offset, ct);
 }
 
 static __init_or_module void