[5/9] x86/hyperv: Use vmmcall to implement Hyper-V hypercall in sev-snp enlightened guest

Message ID 20230601151624.1757616-6-ltykernel@gmail.com
State New
Headers
Series x86/hyperv: Add AMD sev-snp enlightened guest support on hyperv |

Commit Message

Tianyu Lan June 1, 2023, 3:16 p.m. UTC
  From: Tianyu Lan <tiala@microsoft.com>

In sev-snp enlightened guest, Hyper-V hypercall needs
to use vmmcall to trigger vmexit and notify hypervisor
to handle hypercall request.

There is no x86 SEV SNP feature flag support so far and
hardware provides MSR_AMD64_SEV register to check SEV-SNP
capability with MSR_AMD64_SEV_ENABLED bit. ALTERNATIVE can't
work without SEV-SNP x86 feature flag. May add later when
the associated flag is introduced. 

Signed-off-by: Tianyu Lan <tiala@microsoft.com>
---
 arch/x86/include/asm/mshyperv.h | 44 ++++++++++++++++++++++++---------
 1 file changed, 33 insertions(+), 11 deletions(-)
  

Comments

Vitaly Kuznetsov June 5, 2023, 1 p.m. UTC | #1
Tianyu Lan <ltykernel@gmail.com> writes:

> From: Tianyu Lan <tiala@microsoft.com>
>
> In sev-snp enlightened guest, Hyper-V hypercall needs
> to use vmmcall to trigger vmexit and notify hypervisor
> to handle hypercall request.
>
> There is no x86 SEV SNP feature flag support so far and
> hardware provides MSR_AMD64_SEV register to check SEV-SNP
> capability with MSR_AMD64_SEV_ENABLED bit. ALTERNATIVE can't
> work without SEV-SNP x86 feature flag. May add later when
> the associated flag is introduced. 
>
> Signed-off-by: Tianyu Lan <tiala@microsoft.com>
> ---
>  arch/x86/include/asm/mshyperv.h | 44 ++++++++++++++++++++++++---------
>  1 file changed, 33 insertions(+), 11 deletions(-)
>
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 31c476f4e656..d859d7c5f5e8 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -61,16 +61,25 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
>  	u64 hv_status;
>  
>  #ifdef CONFIG_X86_64
> -	if (!hv_hypercall_pg)
> -		return U64_MAX;
> +	if (hv_isolation_type_en_snp()) {

Would it be possible to redo 'hv_isolation_type_en_snp()' into a static
inline doing static_branch_unlikely() so we avoid function call penalty
here?

> +		__asm__ __volatile__("mov %4, %%r8\n"
> +				     "vmmcall"
> +				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> +				       "+c" (control), "+d" (input_address)
> +				     :  "r" (output_address)
> +				     : "cc", "memory", "r8", "r9", "r10", "r11");
> +	} else {
> +		if (!hv_hypercall_pg)
> +			return U64_MAX;
>  
> -	__asm__ __volatile__("mov %4, %%r8\n"
> -			     CALL_NOSPEC
> -			     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> -			       "+c" (control), "+d" (input_address)
> -			     :  "r" (output_address),
> -				THUNK_TARGET(hv_hypercall_pg)
> -			     : "cc", "memory", "r8", "r9", "r10", "r11");
> +		__asm__ __volatile__("mov %4, %%r8\n"
> +				     CALL_NOSPEC
> +				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> +				       "+c" (control), "+d" (input_address)
> +				     :  "r" (output_address),
> +					THUNK_TARGET(hv_hypercall_pg)
> +				     : "cc", "memory", "r8", "r9", "r10", "r11");
> +	}
>  #else
>  	u32 input_address_hi = upper_32_bits(input_address);
>  	u32 input_address_lo = lower_32_bits(input_address);
> @@ -104,7 +113,13 @@ static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1)
>  	u64 hv_status;
>  
>  #ifdef CONFIG_X86_64
> -	{
> +	if (hv_isolation_type_en_snp()) {
> +		__asm__ __volatile__(
> +				"vmmcall"
> +				: "=a" (hv_status), ASM_CALL_CONSTRAINT,
> +				"+c" (control), "+d" (input1)
> +				:: "cc", "r8", "r9", "r10", "r11");
> +	} else {
>  		__asm__ __volatile__(CALL_NOSPEC
>  				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
>  				       "+c" (control), "+d" (input1)
> @@ -149,7 +164,14 @@ static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2)
>  	u64 hv_status;
>  
>  #ifdef CONFIG_X86_64
> -	{
> +	if (hv_isolation_type_en_snp()) {
> +		__asm__ __volatile__("mov %4, %%r8\n"
> +				     "vmmcall"
> +				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> +				       "+c" (control), "+d" (input1)
> +				     : "r" (input2)
> +				     : "cc", "r8", "r9", "r10", "r11");
> +	} else {
>  		__asm__ __volatile__("mov %4, %%r8\n"
>  				     CALL_NOSPEC
>  				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
  
Peter Zijlstra June 8, 2023, 1:21 p.m. UTC | #2
On Thu, Jun 01, 2023 at 11:16:18AM -0400, Tianyu Lan wrote:
> From: Tianyu Lan <tiala@microsoft.com>
> 
> In sev-snp enlightened guest, Hyper-V hypercall needs
> to use vmmcall to trigger vmexit and notify hypervisor
> to handle hypercall request.
> 
> There is no x86 SEV SNP feature flag support so far and
> hardware provides MSR_AMD64_SEV register to check SEV-SNP
> capability with MSR_AMD64_SEV_ENABLED bit. ALTERNATIVE can't
> work without SEV-SNP x86 feature flag. May add later when
> the associated flag is introduced. 
> 
> Signed-off-by: Tianyu Lan <tiala@microsoft.com>
> ---
>  arch/x86/include/asm/mshyperv.h | 44 ++++++++++++++++++++++++---------
>  1 file changed, 33 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 31c476f4e656..d859d7c5f5e8 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -61,16 +61,25 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
>  	u64 hv_status;
>  
>  #ifdef CONFIG_X86_64
> -	if (!hv_hypercall_pg)
> -		return U64_MAX;
> +	if (hv_isolation_type_en_snp()) {
> +		__asm__ __volatile__("mov %4, %%r8\n"
> +				     "vmmcall"
> +				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> +				       "+c" (control), "+d" (input_address)
> +				     :  "r" (output_address)
> +				     : "cc", "memory", "r8", "r9", "r10", "r11");
> +	} else {
> +		if (!hv_hypercall_pg)
> +			return U64_MAX;
>  
> -	__asm__ __volatile__("mov %4, %%r8\n"
> -			     CALL_NOSPEC
> -			     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> -			       "+c" (control), "+d" (input_address)
> -			     :  "r" (output_address),
> -				THUNK_TARGET(hv_hypercall_pg)
> -			     : "cc", "memory", "r8", "r9", "r10", "r11");
> +		__asm__ __volatile__("mov %4, %%r8\n"
> +				     CALL_NOSPEC
> +				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
> +				       "+c" (control), "+d" (input_address)
> +				     :  "r" (output_address),
> +					THUNK_TARGET(hv_hypercall_pg)
> +				     : "cc", "memory", "r8", "r9", "r10", "r11");
> +	}
>  #else

Remains unanswered:

https://lkml.kernel.org/r/20230516102912.GG2587705%40hirez.programming.kicks-ass.net

Would this not generate better code with an alternative?
  
Tianyu Lan June 8, 2023, 3:15 p.m. UTC | #3
On 6/8/2023 9:21 PM, Peter Zijlstra wrote:
> On Thu, Jun 01, 2023 at 11:16:18AM -0400, Tianyu Lan wrote:
>> From: Tianyu Lan <tiala@microsoft.com>
>>
>> In sev-snp enlightened guest, Hyper-V hypercall needs
>> to use vmmcall to trigger vmexit and notify hypervisor
>> to handle hypercall request.
>>
>> There is no x86 SEV SNP feature flag support so far and
>> hardware provides MSR_AMD64_SEV register to check SEV-SNP
>> capability with MSR_AMD64_SEV_ENABLED bit. ALTERNATIVE can't
>> work without SEV-SNP x86 feature flag. May add later when
>> the associated flag is introduced.
>>
>> Signed-off-by: Tianyu Lan <tiala@microsoft.com>
>> ---
>>   arch/x86/include/asm/mshyperv.h | 44 ++++++++++++++++++++++++---------
>>   1 file changed, 33 insertions(+), 11 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
>> index 31c476f4e656..d859d7c5f5e8 100644
>> --- a/arch/x86/include/asm/mshyperv.h
>> +++ b/arch/x86/include/asm/mshyperv.h
>> @@ -61,16 +61,25 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
>>   	u64 hv_status;
>>   
>>   #ifdef CONFIG_X86_64
>> -	if (!hv_hypercall_pg)
>> -		return U64_MAX;
>> +	if (hv_isolation_type_en_snp()) {
>> +		__asm__ __volatile__("mov %4, %%r8\n"
>> +				     "vmmcall"
>> +				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
>> +				       "+c" (control), "+d" (input_address)
>> +				     :  "r" (output_address)
>> +				     : "cc", "memory", "r8", "r9", "r10", "r11");
>> +	} else {
>> +		if (!hv_hypercall_pg)
>> +			return U64_MAX;
>>   
>> -	__asm__ __volatile__("mov %4, %%r8\n"
>> -			     CALL_NOSPEC
>> -			     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
>> -			       "+c" (control), "+d" (input_address)
>> -			     :  "r" (output_address),
>> -				THUNK_TARGET(hv_hypercall_pg)
>> -			     : "cc", "memory", "r8", "r9", "r10", "r11");
>> +		__asm__ __volatile__("mov %4, %%r8\n"
>> +				     CALL_NOSPEC
>> +				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
>> +				       "+c" (control), "+d" (input_address)
>> +				     :  "r" (output_address),
>> +					THUNK_TARGET(hv_hypercall_pg)
>> +				     : "cc", "memory", "r8", "r9", "r10", "r11");
>> +	}
>>   #else
> 
> Remains unanswered:
> 
> https://nam06.safelinks.protection.outlook.com/?url=https%3A%2F%2Flkml.kernel.org%2Fr%2F20230516102912.GG2587705%2540hirez.programming.kicks-ass.net&data=05%7C01%7CTianyu.Lan%40microsoft.com%7C60a576eb67634ffa27b108db68234d5a%7C72f988bf86f141af91ab2d7cd011db47%7C1%7C0%7C638218273105649705%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&sdata=MFj67DON0K%2BUoUJbeaIA5oVTxyrzO3fb5DbxYgDWwX0%3D&reserved=0
> 
> Would this not generate better code with an alternative?


Hi Peter:
	Thanks to review. I put the explaination in the change log.

"There is no x86 SEV SNP feature(X86_FEATURE_SEV_SNP) flag
support so far and hardware provides MSR_AMD64_SEV register
to check SEV-SNP capability with MSR_AMD64_SEV_ENABLED bit
ALTERNATIVE can't work without SEV-SNP x86 feature flag."
There is no cpuid leaf bit to check AMD SEV-SNP feature.

After some Hyper-V doesn't provides SEV and SEV-ES guest before and so
may reuse X86_FEATURE_SEV and X86_FEATURE_SEV_ES flag as alternative
feature check for Hyper-V SEV-SNP guest. Will refresh patch.
  
Tianyu Lan June 27, 2023, 10:57 a.m. UTC | #4
On 6/8/2023 11:15 PM, Tianyu Lan wrote:
> On 6/8/2023 9:21 PM, Peter Zijlstra wrote:
>> On Thu, Jun 01, 2023 at 11:16:18AM -0400, Tianyu Lan wrote:
>>> From: Tianyu Lan <tiala@microsoft.com>
>>>
>>> In sev-snp enlightened guest, Hyper-V hypercall needs
>>> to use vmmcall to trigger vmexit and notify hypervisor
>>> to handle hypercall request.
>>>
>>> There is no x86 SEV SNP feature flag support so far and
>>> hardware provides MSR_AMD64_SEV register to check SEV-SNP
>>> capability with MSR_AMD64_SEV_ENABLED bit. ALTERNATIVE can't
>>> work without SEV-SNP x86 feature flag. May add later when
>>> the associated flag is introduced.
>>>
>>> Signed-off-by: Tianyu Lan <tiala@microsoft.com>
>>> ---
>>>   arch/x86/include/asm/mshyperv.h | 44 ++++++++++++++++++++++++---------
>>>   1 file changed, 33 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/arch/x86/include/asm/mshyperv.h 
>>> b/arch/x86/include/asm/mshyperv.h
>>> index 31c476f4e656..d859d7c5f5e8 100644
>>> --- a/arch/x86/include/asm/mshyperv.h
>>> +++ b/arch/x86/include/asm/mshyperv.h
>>> @@ -61,16 +61,25 @@ static inline u64 hv_do_hypercall(u64 control, 
>>> void *input, void *output)
>>>       u64 hv_status;
>>>   #ifdef CONFIG_X86_64
>>> -    if (!hv_hypercall_pg)
>>> -        return U64_MAX;
>>> +    if (hv_isolation_type_en_snp()) {
>>> +        __asm__ __volatile__("mov %4, %%r8\n"
>>> +                     "vmmcall"
>>> +                     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
>>> +                       "+c" (control), "+d" (input_address)
>>> +                     :  "r" (output_address)
>>> +                     : "cc", "memory", "r8", "r9", "r10", "r11");
>>> +    } else {
>>> +        if (!hv_hypercall_pg)
>>> +            return U64_MAX;
>>> -    __asm__ __volatile__("mov %4, %%r8\n"
>>> -                 CALL_NOSPEC
>>> -                 : "=a" (hv_status), ASM_CALL_CONSTRAINT,
>>> -                   "+c" (control), "+d" (input_address)
>>> -                 :  "r" (output_address),
>>> -                THUNK_TARGET(hv_hypercall_pg)
>>> -                 : "cc", "memory", "r8", "r9", "r10", "r11");
>>> +        __asm__ __volatile__("mov %4, %%r8\n"
>>> +                     CALL_NOSPEC
>>> +                     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
>>> +                       "+c" (control), "+d" (input_address)
>>> +                     :  "r" (output_address),
>>> +                    THUNK_TARGET(hv_hypercall_pg)
>>> +                     : "cc", "memory", "r8", "r9", "r10", "r11");
>>> +    }
>>>   #else
>>
>> Remains unanswered:
>>
>> https://nam06.safelinks.protection.outlook.com/?url=https%3A%2F%2Flkml.kernel.org%2Fr%2F20230516102912.GG2587705%2540hirez.programming.kicks-ass.net&data=05%7C01%7CTianyu.Lan%40microsoft.com%7C60a576eb67634ffa27b108db68234d5a%7C72f988bf86f141af91ab2d7cd011db47%7C1%7C0%7C638218273105649705%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&sdata=MFj67DON0K%2BUoUJbeaIA5oVTxyrzO3fb5DbxYgDWwX0%3D&reserved=0
>>
>> Would this not generate better code with an alternative?
> 
> 
> Hi Peter:
>      Thanks to review. I put the explaination in the change log.
> 
> "There is no x86 SEV SNP feature(X86_FEATURE_SEV_SNP) flag
> support so far and hardware provides MSR_AMD64_SEV register
> to check SEV-SNP capability with MSR_AMD64_SEV_ENABLED bit
> ALTERNATIVE can't work without SEV-SNP x86 feature flag."
> There is no cpuid leaf bit to check AMD SEV-SNP feature.
> 
> After some Hyper-V doesn't provides SEV and SEV-ES guest before and so
> may reuse X86_FEATURE_SEV and X86_FEATURE_SEV_ES flag as alternative
> feature check for Hyper-V SEV-SNP guest. Will refresh patch.
> 

Hi Peter:
      I tried using alternative for "vmmcall" and CALL_NOSPEC in a single
Inline assembly. The output is different in the SEV-SNP mode. When SEV-
SNP is enabled, thunk_target is not required. While it's necessary in
the non SEV-SNP mode. Do you have any idea how to differentiate outputs 
in the single Inline assembly which just like alternative works for
assembler template.
  
Peter Zijlstra June 27, 2023, 11:50 a.m. UTC | #5
On Tue, Jun 27, 2023 at 06:57:28PM +0800, Tianyu Lan wrote:

> > "There is no x86 SEV SNP feature(X86_FEATURE_SEV_SNP) flag

I'm sure we can arrange such a feature if we need it, this isn't rocket
science. Boris?

> > support so far and hardware provides MSR_AMD64_SEV register
> > to check SEV-SNP capability with MSR_AMD64_SEV_ENABLED bit
> > ALTERNATIVE can't work without SEV-SNP x86 feature flag."
> > There is no cpuid leaf bit to check AMD SEV-SNP feature.
> > 
> > After some Hyper-V doesn't provides SEV and SEV-ES guest before and so
> > may reuse X86_FEATURE_SEV and X86_FEATURE_SEV_ES flag as alternative
> > feature check for Hyper-V SEV-SNP guest. Will refresh patch.
> > 
> 
> Hi Peter:
>      I tried using alternative for "vmmcall" and CALL_NOSPEC in a single
> Inline assembly. The output is different in the SEV-SNP mode. When SEV-
> SNP is enabled, thunk_target is not required. While it's necessary in
> the non SEV-SNP mode. Do you have any idea how to differentiate outputs in
> the single Inline assembly which just like alternative works for
> assembler template.

This seems to work; it's a bit magical for having a nested ALTERNATIVE
but the output seems correct (the outer alternative comes last in
.altinstructions and thus has precedence). Sure the [thunk_target] input
goes unsed in one of the alteratives, but who cares.


static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
{
	u64 input_address = input ? virt_to_phys(input) : 0;
	u64 output_address = output ? virt_to_phys(output) : 0;
	u64 hv_status;

#ifdef CONFIG_X86_64
	if (!hv_hypercall_pg)
		return U64_MAX;

#if 0
	if (hv_isolation_type_en_snp()) {
		__asm__ __volatile__("mov %4, %%r8\n"
				     "vmmcall"
				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
				       "+c" (control), "+d" (input_address)
				     :  "r" (output_address)
				     : "cc", "memory", "r8", "r9", "r10", "r11");
	} else {
		__asm__ __volatile__("mov %4, %%r8\n"
				     CALL_NOSPEC
				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
				       "+c" (control), "+d" (input_address)
				     :  "r" (output_address),
					THUNK_TARGET(hv_hypercall_pg)
				     : "cc", "memory", "r8", "r9", "r10", "r11");
	}
#endif

	asm volatile("mov %[output], %%r8\n"
		     ALTERNATIVE(CALL_NOSPEC, "vmmcall", X86_FEATURE_SEV_ES)
		     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
		       "+c" (control), "+d" (input_address)
		     : [output] "r" (output_address),
		       THUNK_TARGET(hv_hypercall_pg)
		     : "cc", "memory", "r8", "r9", "r10", "r11");

#else
	u32 input_address_hi = upper_32_bits(input_address);
	u32 input_address_lo = lower_32_bits(input_address);
	u32 output_address_hi = upper_32_bits(output_address);
	u32 output_address_lo = lower_32_bits(output_address);

	if (!hv_hypercall_pg)
		return U64_MAX;

	__asm__ __volatile__(CALL_NOSPEC
			     : "=A" (hv_status),
			       "+c" (input_address_lo), ASM_CALL_CONSTRAINT
			     : "A" (control),
			       "b" (input_address_hi),
			       "D"(output_address_hi), "S"(output_address_lo),
			       THUNK_TARGET(hv_hypercall_pg)
			     : "cc", "memory");
#endif /* !x86_64 */
	return hv_status;
}

(in actual fact x86_64-defconfig + kvm_guest.config + HYPERV)

$ ./scripts/objdump-func defconfig-build/arch/x86/hyperv/mmu.o hv_do_hypercall
0000 0000000000000cd0 <hv_do_hypercall.constprop.0>:
0000      cd0:  48 89 f9                mov    %rdi,%rcx
0003      cd3:  31 d2                   xor    %edx,%edx
0005      cd5:  48 85 f6                test   %rsi,%rsi
0008      cd8:  74 1b                   je     cf5 <hv_do_hypercall.constprop.0+0x25>
000a      cda:  b8 00 00 00 80          mov    $0x80000000,%eax
000f      cdf:  48 01 c6                add    %rax,%rsi
0012      ce2:  72 38                   jb     d1c <hv_do_hypercall.constprop.0+0x4c>
0014      ce4:  48 c7 c2 00 00 00 80    mov    $0xffffffff80000000,%rdx
001b      ceb:  48 2b 15 00 00 00 00    sub    0x0(%rip),%rdx        # cf2 <hv_do_hypercall.constprop.0+0x22>   cee: R_X86_64_PC32      page_offset_base-0x4
0022      cf2:  48 01 f2                add    %rsi,%rdx
0025      cf5:  48 8b 35 00 00 00 00    mov    0x0(%rip),%rsi        # cfc <hv_do_hypercall.constprop.0+0x2c>   cf8: R_X86_64_PC32      hv_hypercall_pg-0x4
002c      cfc:  48 85 f6                test   %rsi,%rsi
002f      cff:  74 0f                   je     d10 <hv_do_hypercall.constprop.0+0x40>
0031      d01:  31 c0                   xor    %eax,%eax
0033      d03:  49 89 c0                mov    %rax,%r8
0036      d06:  ff d6                   call   *%rsi
0038      d08:  90                      nop
0039      d09:  90                      nop
003a      d0a:  90                      nop
003b      d0b:  e9 00 00 00 00          jmp    d10 <hv_do_hypercall.constprop.0+0x40>   d0c: R_X86_64_PLT32     __x86_return_thunk-0x4
0040      d10:  48 c7 c0 ff ff ff ff    mov    $0xffffffffffffffff,%rax
0047      d17:  e9 00 00 00 00          jmp    d1c <hv_do_hypercall.constprop.0+0x4c>   d18: R_X86_64_PLT32     __x86_return_thunk-0x4
004c      d1c:  48 8b 15 00 00 00 00    mov    0x0(%rip),%rdx        # d23 <hv_do_hypercall.constprop.0+0x53>   d1f: R_X86_64_PC32      phys_base-0x4
0053      d23:  eb cd                   jmp    cf2 <hv_do_hypercall.constprop.0+0x22>

$ objdump -wdr -j .altinstr_replacement defconfig-build/arch/x86/hyperv/mmu.o
0000000000000000 <.altinstr_replacement>:
0:   f3 48 0f b8 c7          popcnt %rdi,%rax
5:   e8 00 00 00 00          call   a <.altinstr_replacement+0xa>    6: R_X86_64_PLT32       __x86_indirect_thunk_rsi-0x4
a:   0f ae e8                lfence
d:   ff d6                   call   *%rsi
f:   0f 01 d9                vmmcall

$ ./readelf-section.sh defconfig-build/arch/x86/hyperv/mmu.o altinstructions
Relocation section '.rela.altinstructions' at offset 0x5420 contains 8 entries:
Offset             Info             Type               Symbol's Value  Symbol's Name + Addend
0000000000000000  0000000200000002 R_X86_64_PC32          0000000000000000 .text + 1e3
0000000000000004  0000000700000002 R_X86_64_PC32          0000000000000000 .altinstr_replacement + 0
000000000000000e  0000000200000002 R_X86_64_PC32          0000000000000000 .text + d06
0000000000000012  0000000700000002 R_X86_64_PC32          0000000000000000 .altinstr_replacement + 5
000000000000001c  0000000200000002 R_X86_64_PC32          0000000000000000 .text + d06
0000000000000020  0000000700000002 R_X86_64_PC32          0000000000000000 .altinstr_replacement + a
000000000000002a  0000000200000002 R_X86_64_PC32          0000000000000000 .text + d06
000000000000002e  0000000700000002 R_X86_64_PC32          0000000000000000 .altinstr_replacement + f
  
Borislav Petkov June 27, 2023, 12:05 p.m. UTC | #6
On Tue, Jun 27, 2023 at 01:50:02PM +0200, Peter Zijlstra wrote:
> On Tue, Jun 27, 2023 at 06:57:28PM +0800, Tianyu Lan wrote:
> 
> > > "There is no x86 SEV SNP feature(X86_FEATURE_SEV_SNP) flag
> 
> I'm sure we can arrange such a feature if we need it, this isn't rocket
> science. Boris?

https://lore.kernel.org/r/20230612042559.375660-7-michael.roth@amd.com

> This seems to work; it's a bit magical for having a nested ALTERNATIVE
> but the output seems correct (the outer alternative comes last in
> .altinstructions and thus has precedence). Sure the [thunk_target] input
> goes unsed in one of the alteratives, but who cares.

I'd like to avoid the nested alternative if not really necessary. I.e.,
a static_call should work here too no?
  
Peter Zijlstra June 27, 2023, 1:38 p.m. UTC | #7
On Tue, Jun 27, 2023 at 02:05:02PM +0200, Borislav Petkov wrote:
> On Tue, Jun 27, 2023 at 01:50:02PM +0200, Peter Zijlstra wrote:
> > On Tue, Jun 27, 2023 at 06:57:28PM +0800, Tianyu Lan wrote:
> > 
> > > > "There is no x86 SEV SNP feature(X86_FEATURE_SEV_SNP) flag
> > 
> > I'm sure we can arrange such a feature if we need it, this isn't rocket
> > science. Boris?
> 
> https://lore.kernel.org/r/20230612042559.375660-7-michael.roth@amd.com
> 
> > This seems to work; it's a bit magical for having a nested ALTERNATIVE
> > but the output seems correct (the outer alternative comes last in
> > .altinstructions and thus has precedence). Sure the [thunk_target] input
> > goes unsed in one of the alteratives, but who cares.
> 
> I'd like to avoid the nested alternative if not really necessary. I.e.,

I really don't see the problem with them; they work as expected.

We rely on .altinstruction order elsewhere as well.

That said; there is a tiny difference between:

ALTERNATIVE_2(oldinst, newinst1, flag1, newinst2, flag2)

and

ALTERNATIVE(ALTERNATIVE(oldinst, newinst1, flag1),
	    newinst2, flag2)

and that is alt_instr::instrlen, when the inner alternative is the
smaller, then the outer alternative will add additional padding that the
inner (obviously) doesn't know about.

However that is easily fixed. See the patch below. Boots for
x86_64-defconfig. Look at how much gunk we can delete.

> a static_call should work here too no?

static_call() looses the inline, but perhaps the function is too large
to get inlined anyway.


---
Subject: x86/alternative: Simply ALTERNATIVE_n()

Instead of making increasingly complicated ALTERNATIVE_n()
implementations, use a nested alternative expressions.

The only difference between:

  ALTERNATIVE_2(oldinst, newinst1, flag1, newinst2, flag2)

and

  ALTERNATIVE(ALTERNATIVE(oldinst, newinst1, flag1),
              newinst2, flag2)

is that the outer alternative can add additional padding when the inner
alternative is the shorter one, which results in alt_instr::instrlen
being inconsistent.

However, this is easily remedied since the alt_instr entries will be
consecutive and it is trivial to compute the max(alt_instr::instrlen) at
runtime while patching.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/alternative.h | 60 +++++---------------------------------
 arch/x86/kernel/alternative.c      |  7 ++++-
 2 files changed, 13 insertions(+), 54 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index d7da28fada87..16a98dd42ce0 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -171,36 +171,6 @@ static inline int alternatives_text_reserved(void *start, void *end)
 		"((" alt_rlen(num) ")-(" alt_slen ")),0x90\n"		\
 	alt_end_marker ":\n"
 
-/*
- * gas compatible max based on the idea from:
- * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
- *
- * The additional "-" is needed because gas uses a "true" value of -1.
- */
-#define alt_max_short(a, b)	"((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))"
-
-/*
- * Pad the second replacement alternative with additional NOPs if it is
- * additionally longer than the first replacement alternative.
- */
-#define OLDINSTR_2(oldinstr, num1, num2) \
-	"# ALT: oldinstr2\n"									\
-	"661:\n\t" oldinstr "\n662:\n"								\
-	"# ALT: padding2\n"									\
-	".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * "	\
-		"(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n"	\
-	alt_end_marker ":\n"
-
-#define OLDINSTR_3(oldinsn, n1, n2, n3)								\
-	"# ALT: oldinstr3\n"									\
-	"661:\n\t" oldinsn "\n662:\n"								\
-	"# ALT: padding3\n"									\
-	".skip -((" alt_max_short(alt_max_short(alt_rlen(n1), alt_rlen(n2)), alt_rlen(n3))	\
-		" - (" alt_slen ")) > 0) * "							\
-		"(" alt_max_short(alt_max_short(alt_rlen(n1), alt_rlen(n2)), alt_rlen(n3))	\
-		" - (" alt_slen ")), 0x90\n"							\
-	alt_end_marker ":\n"
-
 #define ALTINSTR_ENTRY(ft_flags, num)					      \
 	" .long 661b - .\n"				/* label           */ \
 	" .long " b_replacement(num)"f - .\n"		/* new instruction */ \
@@ -222,35 +192,19 @@ static inline int alternatives_text_reserved(void *start, void *end)
 	ALTINSTR_REPLACEMENT(newinstr, 1)				\
 	".popsection\n"
 
-#define ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) \
-	OLDINSTR_2(oldinstr, 1, 2)					\
-	".pushsection .altinstructions,\"a\"\n"				\
-	ALTINSTR_ENTRY(ft_flags1, 1)					\
-	ALTINSTR_ENTRY(ft_flags2, 2)					\
-	".popsection\n"							\
-	".pushsection .altinstr_replacement, \"ax\"\n"			\
-	ALTINSTR_REPLACEMENT(newinstr1, 1)				\
-	ALTINSTR_REPLACEMENT(newinstr2, 2)				\
-	".popsection\n"
+#define ALTERNATIVE_2(oldinst, newinst1, flag1, newinst2, flag2)	\
+	ALTERNATIVE(ALTERNATIVE(oldinst, newinst1, flag1),		\
+		    newinst2, flag2)
 
 /* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
 #define ALTERNATIVE_TERNARY(oldinstr, ft_flags, newinstr_yes, newinstr_no) \
 	ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS,	\
 		      newinstr_yes, ft_flags)
 
-#define ALTERNATIVE_3(oldinsn, newinsn1, ft_flags1, newinsn2, ft_flags2, \
-			newinsn3, ft_flags3)				\
-	OLDINSTR_3(oldinsn, 1, 2, 3)					\
-	".pushsection .altinstructions,\"a\"\n"				\
-	ALTINSTR_ENTRY(ft_flags1, 1)					\
-	ALTINSTR_ENTRY(ft_flags2, 2)					\
-	ALTINSTR_ENTRY(ft_flags3, 3)					\
-	".popsection\n"							\
-	".pushsection .altinstr_replacement, \"ax\"\n"			\
-	ALTINSTR_REPLACEMENT(newinsn1, 1)				\
-	ALTINSTR_REPLACEMENT(newinsn2, 2)				\
-	ALTINSTR_REPLACEMENT(newinsn3, 3)				\
-	".popsection\n"
+#define ALTERNATIVE_3(oldinst, newinst1, flag1, newinst2, flag2,	\
+		      newinst3, flag3)					\
+	ALTERNATIVE(ALTERNATIVE_2(oldinst, newinst1, flag1, newinst2, flag2), \
+		    newinst3, flag3)
 
 /*
  * Alternative instructions for different CPU types or capabilities.
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index a7e1ec50ad29..f0e34e6f01d4 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -398,7 +398,7 @@ apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
 void __init_or_module noinline apply_alternatives(struct alt_instr *start,
 						  struct alt_instr *end)
 {
-	struct alt_instr *a;
+	struct alt_instr *a, *b;
 	u8 *instr, *replacement;
 	u8 insn_buff[MAX_PATCH_LEN];
 
@@ -415,6 +415,11 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
 	for (a = start; a < end; a++) {
 		int insn_buff_sz = 0;
 
+		for (b = a+1; b < end && b->instr_offset == a->instr_offset; b++) {
+			u8 len = max(a->instrlen, b->instrlen);
+			a->instrlen = b->instrlen = len;
+		}
+
 		instr = (u8 *)&a->instr_offset + a->instr_offset;
 		replacement = (u8 *)&a->repl_offset + a->repl_offset;
 		BUG_ON(a->instrlen > sizeof(insn_buff));
  
Peter Zijlstra June 28, 2023, 10:53 a.m. UTC | #8
On Tue, Jun 27, 2023 at 03:38:35PM +0200, Peter Zijlstra wrote:

> That said; there is a tiny difference between:
> 
> ALTERNATIVE_2(oldinst, newinst1, flag1, newinst2, flag2)
> 
> and
> 
> ALTERNATIVE(ALTERNATIVE(oldinst, newinst1, flag1),
> 	    newinst2, flag2)
> 
> and that is alt_instr::instrlen, when the inner alternative is the
> smaller, then the outer alternative will add additional padding that the
> inner (obviously) doesn't know about.

New version:

https://lkml.kernel.org/r/20230628104952.GA2439977%40hirez.programming.kicks-ass.net
  

Patch

diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 31c476f4e656..d859d7c5f5e8 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -61,16 +61,25 @@  static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 	u64 hv_status;
 
 #ifdef CONFIG_X86_64
-	if (!hv_hypercall_pg)
-		return U64_MAX;
+	if (hv_isolation_type_en_snp()) {
+		__asm__ __volatile__("mov %4, %%r8\n"
+				     "vmmcall"
+				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
+				       "+c" (control), "+d" (input_address)
+				     :  "r" (output_address)
+				     : "cc", "memory", "r8", "r9", "r10", "r11");
+	} else {
+		if (!hv_hypercall_pg)
+			return U64_MAX;
 
-	__asm__ __volatile__("mov %4, %%r8\n"
-			     CALL_NOSPEC
-			     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
-			       "+c" (control), "+d" (input_address)
-			     :  "r" (output_address),
-				THUNK_TARGET(hv_hypercall_pg)
-			     : "cc", "memory", "r8", "r9", "r10", "r11");
+		__asm__ __volatile__("mov %4, %%r8\n"
+				     CALL_NOSPEC
+				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
+				       "+c" (control), "+d" (input_address)
+				     :  "r" (output_address),
+					THUNK_TARGET(hv_hypercall_pg)
+				     : "cc", "memory", "r8", "r9", "r10", "r11");
+	}
 #else
 	u32 input_address_hi = upper_32_bits(input_address);
 	u32 input_address_lo = lower_32_bits(input_address);
@@ -104,7 +113,13 @@  static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1)
 	u64 hv_status;
 
 #ifdef CONFIG_X86_64
-	{
+	if (hv_isolation_type_en_snp()) {
+		__asm__ __volatile__(
+				"vmmcall"
+				: "=a" (hv_status), ASM_CALL_CONSTRAINT,
+				"+c" (control), "+d" (input1)
+				:: "cc", "r8", "r9", "r10", "r11");
+	} else {
 		__asm__ __volatile__(CALL_NOSPEC
 				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
 				       "+c" (control), "+d" (input1)
@@ -149,7 +164,14 @@  static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2)
 	u64 hv_status;
 
 #ifdef CONFIG_X86_64
-	{
+	if (hv_isolation_type_en_snp()) {
+		__asm__ __volatile__("mov %4, %%r8\n"
+				     "vmmcall"
+				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,
+				       "+c" (control), "+d" (input1)
+				     : "r" (input2)
+				     : "cc", "r8", "r9", "r10", "r11");
+	} else {
 		__asm__ __volatile__("mov %4, %%r8\n"
 				     CALL_NOSPEC
 				     : "=a" (hv_status), ASM_CALL_CONSTRAINT,