x86/paravirt: Use relative reference for original instruction

Message ID d0fb2176864ed7883b0e53353b663158df2f61d6.1669279198.git.houwenlong.hwl@antgroup.com
State New
Headers
Series x86/paravirt: Use relative reference for original instruction |

Commit Message

Hou Wenlong Nov. 24, 2022, 8:51 a.m. UTC
  Similar to the alternative patching, use relative reference for original
instruction rather than absolute one, which saves 8 bytes for one entry
on x86_64.  And it could generate R_X86_64_PC32 relocation instead of
R_X86_64_64 relocation, which also reduces relocation metadata on
relocatable builds.

Signed-off-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
---
 arch/x86/include/asm/paravirt.h       | 6 +++---
 arch/x86/include/asm/paravirt_types.h | 4 ++--
 arch/x86/kernel/alternative.c         | 8 +++++---
 3 files changed, 10 insertions(+), 8 deletions(-)
  

Comments

Juergen Gross Nov. 24, 2022, 10:18 a.m. UTC | #1
On 24.11.22 09:51, Hou Wenlong wrote:
> Similar to the alternative patching, use relative reference for original
> instruction rather than absolute one, which saves 8 bytes for one entry
> on x86_64.  And it could generate R_X86_64_PC32 relocation instead of

8 bytes saved? I think those are 4 bytes only.

> R_X86_64_64 relocation, which also reduces relocation metadata on
> relocatable builds.
> 
> Signed-off-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
> ---
>   arch/x86/include/asm/paravirt.h       | 6 +++---
>   arch/x86/include/asm/paravirt_types.h | 4 ++--
>   arch/x86/kernel/alternative.c         | 8 +++++---
>   3 files changed, 10 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
> index 2851bc2339d5..2cbe9b64e103 100644
> --- a/arch/x86/include/asm/paravirt.h
> +++ b/arch/x86/include/asm/paravirt.h
> @@ -735,13 +735,13 @@ extern void default_banner(void);
>   
>   #else  /* __ASSEMBLY__ */
>   
> -#define _PVSITE(ptype, ops, word, algn)		\
> +#define _PVSITE(ptype, ops, algn)		\

Would you please drop the algn parameter, too? It isn't needed anymore
as the alignment can be hard coded to be 4 now. This would need to be
adjusted in the _paravirt_alt() macro, too.

>   771:;						\
>   	ops;					\
>   772:;						\
>   	.pushsection .parainstructions,"a";	\
>   	 .align	algn;				\
> -	 word 771b;				\
> +	 .long 771b-.;				\
>   	 .byte ptype;				\
>   	 .byte 772b-771b;			\
>   	 _ASM_ALIGN;				\
> @@ -752,7 +752,7 @@ extern void default_banner(void);
>   #ifdef CONFIG_PARAVIRT_XXL
>   
>   #define PARA_PATCH(off)		((off) / 8)
> -#define PARA_SITE(ptype, ops)	_PVSITE(ptype, ops, .quad, 8)
> +#define PARA_SITE(ptype, ops)	_PVSITE(ptype, ops, 8)
>   #define PARA_INDIRECT(addr)	*addr(%rip)
>   
>   #ifdef CONFIG_DEBUG_ENTRY
> diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
> index 8c1da419260f..19f709cf7df9 100644
> --- a/arch/x86/include/asm/paravirt_types.h
> +++ b/arch/x86/include/asm/paravirt_types.h
> @@ -5,7 +5,7 @@
>   #ifndef __ASSEMBLY__
>   /* These all sit in the .parainstructions section to tell us what to patch. */
>   struct paravirt_patch_site {
> -	u8 *instr;		/* original instructions */
> +	s32 instr_offset;	/* original instructions */
>   	u8 type;		/* type of this instruction */
>   	u8 len;			/* length of original instruction */
>   };
> @@ -274,7 +274,7 @@ extern struct paravirt_patch_template pv_ops;
>   	"771:\n\t" insn_string "\n" "772:\n"		\
>   	".pushsection .parainstructions,\"a\"\n"	\
>   	_ASM_ALIGN "\n"					\
> -	_ASM_PTR " 771b\n"				\
> +	"  .long 771b-.\n"				\
>   	"  .byte " type "\n"				\
>   	"  .byte 772b-771b\n"				\
>   	_ASM_ALIGN "\n"					\
> diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
> index 111b809f0ac2..6eea563a098d 100644
> --- a/arch/x86/kernel/alternative.c
> +++ b/arch/x86/kernel/alternative.c
> @@ -1232,20 +1232,22 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
>   {
>   	struct paravirt_patch_site *p;
>   	char insn_buff[MAX_PATCH_LEN];
> +	u8 *instr;
>   
>   	for (p = start; p < end; p++) {
>   		unsigned int used;
>   
> +		instr = (u8 *)&p->instr_offset + p->instr_offset;
>   		BUG_ON(p->len > MAX_PATCH_LEN);
>   		/* prep the buffer with the original instructions */
> -		memcpy(insn_buff, p->instr, p->len);
> -		used = paravirt_patch(p->type, insn_buff, (unsigned long)p->instr, p->len);
> +		memcpy(insn_buff, instr, p->len);
> +		used = paravirt_patch(p->type, insn_buff, (unsigned long)instr, p->len);
>   
>   		BUG_ON(used > p->len);
>   
>   		/* Pad the rest with nops */
>   		add_nops(insn_buff + used, p->len - used);
> -		text_poke_early(p->instr, insn_buff, p->len);
> +		text_poke_early(instr, insn_buff, p->len);
>   	}
>   }
>   extern struct paravirt_patch_site __start_parainstructions[],


Juergen
  
Hou Wenlong Nov. 24, 2022, 11:06 a.m. UTC | #2
On Thu, Nov 24, 2022 at 11:18:52AM +0100, Juergen Gross wrote:
> On 24.11.22 09:51, Hou Wenlong wrote:
> >Similar to the alternative patching, use relative reference for original
> >instruction rather than absolute one, which saves 8 bytes for one entry
> >on x86_64.  And it could generate R_X86_64_PC32 relocation instead of
> 
> 8 bytes saved? I think those are 4 bytes only.
>
The corresponding C structure paravirt_patch_site is not packed, Before this,
its size is 16 bytes in x86_64,due to the alignment of 8 bytes. Now the alignment
is 4 bytes, so the size is 8 bytes.

> >R_X86_64_64 relocation, which also reduces relocation metadata on
> >relocatable builds.
> >
> >Signed-off-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
> >---
> >  arch/x86/include/asm/paravirt.h       | 6 +++---
> >  arch/x86/include/asm/paravirt_types.h | 4 ++--
> >  arch/x86/kernel/alternative.c         | 8 +++++---
> >  3 files changed, 10 insertions(+), 8 deletions(-)
> >
> >diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
> >index 2851bc2339d5..2cbe9b64e103 100644
> >--- a/arch/x86/include/asm/paravirt.h
> >+++ b/arch/x86/include/asm/paravirt.h
> >@@ -735,13 +735,13 @@ extern void default_banner(void);
> >  #else  /* __ASSEMBLY__ */
> >-#define _PVSITE(ptype, ops, word, algn)		\
> >+#define _PVSITE(ptype, ops, algn)		\
> 
> Would you please drop the algn parameter, too? It isn't needed anymore
> as the alignment can be hard coded to be 4 now. This would need to be
> adjusted in the _paravirt_alt() macro, too.
> 
OK, since the aligment is 4 bytes now, it seems that _ASM_ALIGN could
be dropped too?

> >  771:;						\
> >  	ops;					\
> >  772:;						\
> >  	.pushsection .parainstructions,"a";	\
> >  	 .align	algn;				\
> >-	 word 771b;				\
> >+	 .long 771b-.;				\
> >  	 .byte ptype;				\
> >  	 .byte 772b-771b;			\
> >  	 _ASM_ALIGN;				\
> >@@ -752,7 +752,7 @@ extern void default_banner(void);
> >  #ifdef CONFIG_PARAVIRT_XXL
> >  #define PARA_PATCH(off)		((off) / 8)
> >-#define PARA_SITE(ptype, ops)	_PVSITE(ptype, ops, .quad, 8)
> >+#define PARA_SITE(ptype, ops)	_PVSITE(ptype, ops, 8)
> >  #define PARA_INDIRECT(addr)	*addr(%rip)
> >  #ifdef CONFIG_DEBUG_ENTRY
> >diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
> >index 8c1da419260f..19f709cf7df9 100644
> >--- a/arch/x86/include/asm/paravirt_types.h
> >+++ b/arch/x86/include/asm/paravirt_types.h
> >@@ -5,7 +5,7 @@
> >  #ifndef __ASSEMBLY__
> >  /* These all sit in the .parainstructions section to tell us what to patch. */
> >  struct paravirt_patch_site {
> >-	u8 *instr;		/* original instructions */
> >+	s32 instr_offset;	/* original instructions */
> >  	u8 type;		/* type of this instruction */
> >  	u8 len;			/* length of original instruction */
> >  };
> >@@ -274,7 +274,7 @@ extern struct paravirt_patch_template pv_ops;
> >  	"771:\n\t" insn_string "\n" "772:\n"		\
> >  	".pushsection .parainstructions,\"a\"\n"	\
> >  	_ASM_ALIGN "\n"					\
> >-	_ASM_PTR " 771b\n"				\
> >+	"  .long 771b-.\n"				\
> >  	"  .byte " type "\n"				\
> >  	"  .byte 772b-771b\n"				\
> >  	_ASM_ALIGN "\n"					\
> >diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
> >index 111b809f0ac2..6eea563a098d 100644
> >--- a/arch/x86/kernel/alternative.c
> >+++ b/arch/x86/kernel/alternative.c
> >@@ -1232,20 +1232,22 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
> >  {
> >  	struct paravirt_patch_site *p;
> >  	char insn_buff[MAX_PATCH_LEN];
> >+	u8 *instr;
> >  	for (p = start; p < end; p++) {
> >  		unsigned int used;
> >+		instr = (u8 *)&p->instr_offset + p->instr_offset;
> >  		BUG_ON(p->len > MAX_PATCH_LEN);
> >  		/* prep the buffer with the original instructions */
> >-		memcpy(insn_buff, p->instr, p->len);
> >-		used = paravirt_patch(p->type, insn_buff, (unsigned long)p->instr, p->len);
> >+		memcpy(insn_buff, instr, p->len);
> >+		used = paravirt_patch(p->type, insn_buff, (unsigned long)instr, p->len);
> >  		BUG_ON(used > p->len);
> >  		/* Pad the rest with nops */
> >  		add_nops(insn_buff + used, p->len - used);
> >-		text_poke_early(p->instr, insn_buff, p->len);
> >+		text_poke_early(instr, insn_buff, p->len);
> >  	}
> >  }
> >  extern struct paravirt_patch_site __start_parainstructions[],
> 
> 
> Juergen

> pub  2048R/28BF132F 2014-06-02 Juergen Gross <jg@pfupf.net>
> uid                            Juergen Gross <jgross@suse.com>
> uid                            Juergen Gross <jgross@novell.com>
> uid                            Juergen Gross <jgross@suse.de>
> sub  2048R/16375B53 2014-06-02
  
Juergen Gross Nov. 24, 2022, 11:18 a.m. UTC | #3
On 24.11.22 12:06, Hou Wenlong wrote:
> On Thu, Nov 24, 2022 at 11:18:52AM +0100, Juergen Gross wrote:
>> On 24.11.22 09:51, Hou Wenlong wrote:
>>> Similar to the alternative patching, use relative reference for original
>>> instruction rather than absolute one, which saves 8 bytes for one entry
>>> on x86_64.  And it could generate R_X86_64_PC32 relocation instead of
>>
>> 8 bytes saved? I think those are 4 bytes only.
>>
> The corresponding C structure paravirt_patch_site is not packed, Before this,
> its size is 16 bytes in x86_64,due to the alignment of 8 bytes. Now the alignment
> is 4 bytes, so the size is 8 bytes.

Oh, I've looked at Linus' tree. You seem to have based your reasoning on
the tip/paravirt branch, which is fine.

> 
>>> R_X86_64_64 relocation, which also reduces relocation metadata on
>>> relocatable builds.
>>>
>>> Signed-off-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
>>> ---
>>>   arch/x86/include/asm/paravirt.h       | 6 +++---
>>>   arch/x86/include/asm/paravirt_types.h | 4 ++--
>>>   arch/x86/kernel/alternative.c         | 8 +++++---
>>>   3 files changed, 10 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
>>> index 2851bc2339d5..2cbe9b64e103 100644
>>> --- a/arch/x86/include/asm/paravirt.h
>>> +++ b/arch/x86/include/asm/paravirt.h
>>> @@ -735,13 +735,13 @@ extern void default_banner(void);
>>>   #else  /* __ASSEMBLY__ */
>>> -#define _PVSITE(ptype, ops, word, algn)		\
>>> +#define _PVSITE(ptype, ops, algn)		\
>>
>> Would you please drop the algn parameter, too? It isn't needed anymore
>> as the alignment can be hard coded to be 4 now. This would need to be
>> adjusted in the _paravirt_alt() macro, too.
>>
> OK, since the aligment is 4 bytes now, it seems that _ASM_ALIGN could
> be dropped too?

That's what I meant with the adjustment of _paravirt_alt().

I wouldn't drop _ASM_ALIGN, but replace it with ".align 4".


Juergen
  

Patch

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 2851bc2339d5..2cbe9b64e103 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -735,13 +735,13 @@  extern void default_banner(void);
 
 #else  /* __ASSEMBLY__ */
 
-#define _PVSITE(ptype, ops, word, algn)		\
+#define _PVSITE(ptype, ops, algn)		\
 771:;						\
 	ops;					\
 772:;						\
 	.pushsection .parainstructions,"a";	\
 	 .align	algn;				\
-	 word 771b;				\
+	 .long 771b-.;				\
 	 .byte ptype;				\
 	 .byte 772b-771b;			\
 	 _ASM_ALIGN;				\
@@ -752,7 +752,7 @@  extern void default_banner(void);
 #ifdef CONFIG_PARAVIRT_XXL
 
 #define PARA_PATCH(off)		((off) / 8)
-#define PARA_SITE(ptype, ops)	_PVSITE(ptype, ops, .quad, 8)
+#define PARA_SITE(ptype, ops)	_PVSITE(ptype, ops, 8)
 #define PARA_INDIRECT(addr)	*addr(%rip)
 
 #ifdef CONFIG_DEBUG_ENTRY
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 8c1da419260f..19f709cf7df9 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -5,7 +5,7 @@ 
 #ifndef __ASSEMBLY__
 /* These all sit in the .parainstructions section to tell us what to patch. */
 struct paravirt_patch_site {
-	u8 *instr;		/* original instructions */
+	s32 instr_offset;	/* original instructions */
 	u8 type;		/* type of this instruction */
 	u8 len;			/* length of original instruction */
 };
@@ -274,7 +274,7 @@  extern struct paravirt_patch_template pv_ops;
 	"771:\n\t" insn_string "\n" "772:\n"		\
 	".pushsection .parainstructions,\"a\"\n"	\
 	_ASM_ALIGN "\n"					\
-	_ASM_PTR " 771b\n"				\
+	"  .long 771b-.\n"				\
 	"  .byte " type "\n"				\
 	"  .byte 772b-771b\n"				\
 	_ASM_ALIGN "\n"					\
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 111b809f0ac2..6eea563a098d 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1232,20 +1232,22 @@  void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
 {
 	struct paravirt_patch_site *p;
 	char insn_buff[MAX_PATCH_LEN];
+	u8 *instr;
 
 	for (p = start; p < end; p++) {
 		unsigned int used;
 
+		instr = (u8 *)&p->instr_offset + p->instr_offset;
 		BUG_ON(p->len > MAX_PATCH_LEN);
 		/* prep the buffer with the original instructions */
-		memcpy(insn_buff, p->instr, p->len);
-		used = paravirt_patch(p->type, insn_buff, (unsigned long)p->instr, p->len);
+		memcpy(insn_buff, instr, p->len);
+		used = paravirt_patch(p->type, insn_buff, (unsigned long)instr, p->len);
 
 		BUG_ON(used > p->len);
 
 		/* Pad the rest with nops */
 		add_nops(insn_buff + used, p->len - used);
-		text_poke_early(p->instr, insn_buff, p->len);
+		text_poke_early(instr, insn_buff, p->len);
 	}
 }
 extern struct paravirt_patch_site __start_parainstructions[],