[kernel,1/3] x86/amd/dr_addr_mask: Cache values in percpu variables

Message ID 20221201021948.9259-2-aik@amd.com
State New
Headers
Series KVM: SEV: Enable AMD SEV-ES DebugSwap |

Commit Message

Alexey Kardashevskiy Dec. 1, 2022, 2:19 a.m. UTC
  Reading DR[0-3]_ADDR_MASK MSRs takes about 250 cycles which is going to
be noticeable when the AMD KVM SEV-ES's DebugSwap feature is enabled and
KVM needs to store these before switching to a guest; the DebugSwitch
hardware support restores them as type B swap.

This stores MSR values from set_dr_addr_mask() in percpu values and
returns them via new get_dr_addr_mask(). The gain here is about 10x.

Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
---
 arch/x86/include/asm/debugreg.h |  1 +
 arch/x86/kernel/cpu/amd.c       | 32 ++++++++++++++++++++
 2 files changed, 33 insertions(+)
  

Comments

Sean Christopherson Dec. 1, 2022, 4:58 p.m. UTC | #1
On Thu, Dec 01, 2022, Alexey Kardashevskiy wrote:
> Reading DR[0-3]_ADDR_MASK MSRs takes about 250 cycles which is going to
> be noticeable when the AMD KVM SEV-ES's DebugSwap feature is enabled and
> KVM needs to store these before switching to a guest; the DebugSwitch
> hardware support restores them as type B swap.
>
> This stores MSR values from set_dr_addr_mask() in percpu values and
> returns them via new get_dr_addr_mask(). The gain here is about 10x.
>
> Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
> ---
>  arch/x86/include/asm/debugreg.h |  1 +
>  arch/x86/kernel/cpu/amd.c       | 32 ++++++++++++++++++++
>  2 files changed, 33 insertions(+)
> 
> diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
> index cfdf307ddc01..c4324d0205b5 100644
> --- a/arch/x86/include/asm/debugreg.h
> +++ b/arch/x86/include/asm/debugreg.h
> @@ -127,6 +127,7 @@ static __always_inline void local_db_restore(unsigned long dr7)
>  
>  #ifdef CONFIG_CPU_SUP_AMD
>  extern void set_dr_addr_mask(unsigned long mask, int dr);
> +extern unsigned long get_dr_addr_mask(int dr);
>  #else
>  static inline void set_dr_addr_mask(unsigned long mask, int dr) { }

KVM_AMD doesn't depend on CPU_SUP_AMD, i.e. this needs a stub.  Or we need to add
a dependency.

> diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
> index c75d75b9f11a..ec7efcef4e14 100644
> --- a/arch/x86/kernel/cpu/amd.c
> +++ b/arch/x86/kernel/cpu/amd.c
> @@ -1158,6 +1158,11 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
>  	return false;
>  }
>  
> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr0_addr_mask);
> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr1_addr_mask);
> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr2_addr_mask);
> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr3_addr_mask);
> +
>  void set_dr_addr_mask(unsigned long mask, int dr)
>  {
>  	if (!boot_cpu_has(X86_FEATURE_BPEXT))
> @@ -1166,17 +1171,44 @@ void set_dr_addr_mask(unsigned long mask, int dr)
>  	switch (dr) {
>  	case 0:
>  		wrmsr(MSR_F16H_DR0_ADDR_MASK, mask, 0);

LOL, I'd love to hear how MSR_F16H_DR0_ADDR_MASK ended up with a completely
different MSR index.

> +		per_cpu(dr0_addr_mask, smp_processor_id()) = mask;

Use an array to avoid the copy+paste?  And if you're going to add a cache, might
as well use it to avoid unnecessary writes.

>  		break;
>  	case 1:
  
Alexey Kardashevskiy Dec. 6, 2022, 7:14 a.m. UTC | #2
On 2/12/22 03:58, Sean Christopherson wrote:
> On Thu, Dec 01, 2022, Alexey Kardashevskiy wrote:
>> Reading DR[0-3]_ADDR_MASK MSRs takes about 250 cycles which is going to
>> be noticeable when the AMD KVM SEV-ES's DebugSwap feature is enabled and
>> KVM needs to store these before switching to a guest; the DebugSwitch
>> hardware support restores them as type B swap.
>>
>> This stores MSR values from set_dr_addr_mask() in percpu values and
>> returns them via new get_dr_addr_mask(). The gain here is about 10x.
>>
>> Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
>> ---
>>   arch/x86/include/asm/debugreg.h |  1 +
>>   arch/x86/kernel/cpu/amd.c       | 32 ++++++++++++++++++++
>>   2 files changed, 33 insertions(+)
>>
>> diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
>> index cfdf307ddc01..c4324d0205b5 100644
>> --- a/arch/x86/include/asm/debugreg.h
>> +++ b/arch/x86/include/asm/debugreg.h
>> @@ -127,6 +127,7 @@ static __always_inline void local_db_restore(unsigned long dr7)
>>   
>>   #ifdef CONFIG_CPU_SUP_AMD
>>   extern void set_dr_addr_mask(unsigned long mask, int dr);
>> +extern unsigned long get_dr_addr_mask(int dr);
>>   #else
>>   static inline void set_dr_addr_mask(unsigned long mask, int dr) { }
> 
> KVM_AMD doesn't depend on CPU_SUP_AMD, i.e. this needs a stub.  Or we need to add
> a dependency.

The new symbol is under CONFIG_CPU_SUP_AMD and so is its only user 
arch/x86/kernel/cpu/amd.c:

arch/x86/kernel/cpu/Makefile:
obj-$(CONFIG_CPU_SUP_AMD)               += amd.o


Is this enough dependency or we need something else? (if enough, I'll 
put it into the next commit log).


>> diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
>> index c75d75b9f11a..ec7efcef4e14 100644
>> --- a/arch/x86/kernel/cpu/amd.c
>> +++ b/arch/x86/kernel/cpu/amd.c
>> @@ -1158,6 +1158,11 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
>>   	return false;
>>   }
>>   
>> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr0_addr_mask);
>> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr1_addr_mask);
>> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr2_addr_mask);
>> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr3_addr_mask);
>> +
>>   void set_dr_addr_mask(unsigned long mask, int dr)
>>   {
>>   	if (!boot_cpu_has(X86_FEATURE_BPEXT))
>> @@ -1166,17 +1171,44 @@ void set_dr_addr_mask(unsigned long mask, int dr)
>>   	switch (dr) {
>>   	case 0:
>>   		wrmsr(MSR_F16H_DR0_ADDR_MASK, mask, 0);
> 
> LOL, I'd love to hear how MSR_F16H_DR0_ADDR_MASK ended up with a completely
> different MSR index.
>> +		per_cpu(dr0_addr_mask, smp_processor_id()) = mask;
> 
> Use an array to avoid the copy+paste?  And if you're going to add a cache, might
> as well use it to avoid unnecessary writes.

I'll do this, I did not realize DEFINE_PER_CPU_READ_MOSTLY can do 
arrays. Thanks,

> 
>>   		break;
>>   	case 1:
  
Sean Christopherson Dec. 6, 2022, 5:07 p.m. UTC | #3
On Tue, Dec 06, 2022, Alexey Kardashevskiy wrote:
> 
> On 2/12/22 03:58, Sean Christopherson wrote:
> > On Thu, Dec 01, 2022, Alexey Kardashevskiy wrote:
> > > diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
> > > index cfdf307ddc01..c4324d0205b5 100644
> > > --- a/arch/x86/include/asm/debugreg.h
> > > +++ b/arch/x86/include/asm/debugreg.h
> > > @@ -127,6 +127,7 @@ static __always_inline void local_db_restore(unsigned long dr7)
> > >   #ifdef CONFIG_CPU_SUP_AMD
> > >   extern void set_dr_addr_mask(unsigned long mask, int dr);
> > > +extern unsigned long get_dr_addr_mask(int dr);
> > >   #else
> > >   static inline void set_dr_addr_mask(unsigned long mask, int dr) { }
> > 
> > KVM_AMD doesn't depend on CPU_SUP_AMD, i.e. this needs a stub.  Or we need to add
> > a dependency.
> 
> The new symbol is under CONFIG_CPU_SUP_AMD and so is its only user
> arch/x86/kernel/cpu/amd.c:
> 
> arch/x86/kernel/cpu/Makefile:
> obj-$(CONFIG_CPU_SUP_AMD)               += amd.o
> 
> 
> Is this enough dependency or we need something else? (if enough, I'll put it
> into the next commit log).

No, it's actually the opposite, the issue is precisely that the symbol is buried
under CPU_SUP_AMD.  KVM_AMD doesn't currently depend on CPU_SUP_AMD, and so the
usage in sev_es_prepare_switch_to_guest() fails to compile if CPU_SUP_AMD=n and
KVM_AMD={Y,M}.

  config KVM_AMD
	tristate "KVM for AMD processors support"
	depends on KVM

I actually just submitted a patch[*] to "fix" that since KVM requires the CPU vendor
to be AMD or Hygon at runtime.  Although that patch is buried in the middle of a
large series, it doesn't have any dependencies.  So, if this series is routed through
the KVM tree, it should be straightforward to just pull that patch into this series,
and whichever series lands first gets the honor of applying that patch.

If this series is routed through the tip tree, the best option may be to just add
a stub to avoid potential conflicts, and then we can rip the stub out later.

[*] https://lore.kernel.org/all/20221201232655.290720-12-seanjc@google.com
  
Alexey Kardashevskiy Dec. 7, 2022, 12:50 a.m. UTC | #4
On 7/12/22 04:07, Sean Christopherson wrote:
> On Tue, Dec 06, 2022, Alexey Kardashevskiy wrote:
>>
>> On 2/12/22 03:58, Sean Christopherson wrote:
>>> On Thu, Dec 01, 2022, Alexey Kardashevskiy wrote:
>>>> diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
>>>> index cfdf307ddc01..c4324d0205b5 100644
>>>> --- a/arch/x86/include/asm/debugreg.h
>>>> +++ b/arch/x86/include/asm/debugreg.h
>>>> @@ -127,6 +127,7 @@ static __always_inline void local_db_restore(unsigned long dr7)
>>>>    #ifdef CONFIG_CPU_SUP_AMD
>>>>    extern void set_dr_addr_mask(unsigned long mask, int dr);
>>>> +extern unsigned long get_dr_addr_mask(int dr);
>>>>    #else
>>>>    static inline void set_dr_addr_mask(unsigned long mask, int dr) { }
>>>
>>> KVM_AMD doesn't depend on CPU_SUP_AMD, i.e. this needs a stub.  Or we need to add
>>> a dependency.
>>
>> The new symbol is under CONFIG_CPU_SUP_AMD and so is its only user
>> arch/x86/kernel/cpu/amd.c:
>>
>> arch/x86/kernel/cpu/Makefile:
>> obj-$(CONFIG_CPU_SUP_AMD)               += amd.o
>>
>>
>> Is this enough dependency or we need something else? (if enough, I'll put it
>> into the next commit log).
> 
> No, it's actually the opposite, the issue is precisely that the symbol is buried
> under CPU_SUP_AMD.  KVM_AMD doesn't currently depend on CPU_SUP_AMD, and so the
> usage in sev_es_prepare_switch_to_guest() fails to compile if CPU_SUP_AMD=n and
> KVM_AMD={Y,M}.

Ouch, you are one step ahead, 2/3 fails, not this one. My bad. I'll add 
a stub anyway.

btw I want to do "s/int dr/unsigned int dr/" since I am using an array 
now. Does it have to be patch#1 "fix set_dr_addr_mask" and then patch#2 
"add get_dr_addr_mask" or one patch will do? Thanks,


> 
>    config KVM_AMD
> 	tristate "KVM for AMD processors support"
> 	depends on KVM
> 
> I actually just submitted a patch[*] to "fix" that since KVM requires the CPU vendor
> to be AMD or Hygon at runtime.

>  Although that patch is buried in the middle of a
> large series, it doesn't have any dependencies.  So, if this series is routed through
> the KVM tree, it should be straightforward to just pull that patch into this series,
> and whichever series lands first gets the honor of applying that patch.
> 
> If this series is routed through the tip tree, the best option may be to just add
> a stub to avoid potential conflicts, and then we can rip the stub out later.
> 
> [*] https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flore.kernel.org%2Fall%2F20221201232655.290720-12-seanjc%40google.com&amp;data=05%7C01%7CAlexey.Kardashevskiy%40amd.com%7C6ee92cb78f8f446b887908dad7ac6fca%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638059432896741545%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=25f18IS6z9HEf0Qtt%2BFDxZdtbTVPg%2FVulsFGhWLH0Rg%3D&amp;reserved=0
  
Borislav Petkov Dec. 7, 2022, 6:55 p.m. UTC | #5
On Thu, Dec 01, 2022 at 01:19:46PM +1100, Alexey Kardashevskiy wrote:
> Subject: Re: [PATCH kernel 1/3] x86/amd/dr_addr_mask: Cache values in percpu variables

"x86/amd: " is perfectly fine as a prefix.

> Reading DR[0-3]_ADDR_MASK MSRs takes about 250 cycles which is going to
> be noticeable when the AMD KVM SEV-ES's DebugSwap feature is enabled and

which does what? I.e., a sort of lazy DR regs swapping...

> KVM needs to store these before switching to a guest; the DebugSwitch
> hardware support restores them as type B swap.

I know this is all clear to you but you should explain what type B
register swap is.

> This stores MSR values from set_dr_addr_mask() in percpu values and

s/This stores/Store/

From Documentation/process/submitting-patches.rst:

 "Describe your changes in imperative mood, e.g. "make xyzzy do frotz"
  instead of "[This patch] makes xyzzy do frotz" or "[I] changed xyzzy
  to do frotz", as if you are giving orders to the codebase to change
  its behaviour."

Also, do not talk about what your patch does - that should hopefully be
visible in the diff itself. Rather, talk about *why* you're doing what
you're doing.

> returns them via new get_dr_addr_mask(). The gain here is about 10x.
> 
> Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
> ---
>  arch/x86/include/asm/debugreg.h |  1 +
>  arch/x86/kernel/cpu/amd.c       | 32 ++++++++++++++++++++
>  2 files changed, 33 insertions(+)
> 
> diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
> index cfdf307ddc01..c4324d0205b5 100644
> --- a/arch/x86/include/asm/debugreg.h
> +++ b/arch/x86/include/asm/debugreg.h
> @@ -127,6 +127,7 @@ static __always_inline void local_db_restore(unsigned long dr7)
>  
>  #ifdef CONFIG_CPU_SUP_AMD
>  extern void set_dr_addr_mask(unsigned long mask, int dr);
> +extern unsigned long get_dr_addr_mask(int dr);
>  #else
>  static inline void set_dr_addr_mask(unsigned long mask, int dr) { }
>  #endif
> diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
> index c75d75b9f11a..ec7efcef4e14 100644
> --- a/arch/x86/kernel/cpu/amd.c
> +++ b/arch/x86/kernel/cpu/amd.c
> @@ -1158,6 +1158,11 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
>  	return false;
>  }
>  
> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr0_addr_mask);
> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr1_addr_mask);
> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr2_addr_mask);
> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr3_addr_mask);

This BPEXT thing is AMD-only, right?

I guess those should be called amd_drX_addr_mask where X in [0-3].

Yeah yeah, they are used in AMD-only code - svm* - but still.

>  void set_dr_addr_mask(unsigned long mask, int dr)
>  {
>  	if (!boot_cpu_has(X86_FEATURE_BPEXT))
> @@ -1166,17 +1171,44 @@ void set_dr_addr_mask(unsigned long mask, int dr)
>  	switch (dr) {
>  	case 0:
>  		wrmsr(MSR_F16H_DR0_ADDR_MASK, mask, 0);
> +		per_cpu(dr0_addr_mask, smp_processor_id()) = mask;
>  		break;
>  	case 1:
> +		wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0);
> +		per_cpu(dr1_addr_mask, smp_processor_id()) = mask;
> +		break;
>  	case 2:
> +		wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0);
> +		per_cpu(dr2_addr_mask, smp_processor_id()) = mask;
> +		break;
>  	case 3:
>  		wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0);
> +		per_cpu(dr3_addr_mask, smp_processor_id()) = mask;
>  		break;
>  	default:
>  		break;
>  	}
>  }
>  
> +unsigned long get_dr_addr_mask(int dr)

This function name is too generic for an exported function.

amd_get_dr_addr_mask() I'd say.

> +	if (!boot_cpu_has(X86_FEATURE_BPEXT))

check_for_deprecated_apis: WARNING: arch/x86/kernel/cpu/amd.c:1195: Do not use boot_cpu_has() - use cpu_feature_enabled() instead.

You could fix the above one too, while at it.

> +		return 0;
> +
> +	switch (dr) {
> +	case 0:
> +		return per_cpu(dr0_addr_mask, smp_processor_id());
> +	case 1:
> +		return per_cpu(dr1_addr_mask, smp_processor_id());
> +	case 2:
> +		return per_cpu(dr2_addr_mask, smp_processor_id());
> +	case 3:
> +		return per_cpu(dr3_addr_mask, smp_processor_id());

	default:	
		WARN_ON_ONCE(1);
		break;

Just in case.

And as a matter of fact, make that short and succinct:

        switch (dr) {
        case 0: return per_cpu(dr0_addr_mask, smp_processor_id());
        case 1: return per_cpu(dr1_addr_mask, smp_processor_id());
        case 2: return per_cpu(dr2_addr_mask, smp_processor_id());
        case 3: return per_cpu(dr3_addr_mask, smp_processor_id());
        default: WARN_ON_ONCE(1); break;
        }

Thx.
  
Alexey Kardashevskiy Dec. 8, 2022, 6:11 a.m. UTC | #6
On 8/12/22 05:55, Borislav Petkov wrote:
> On Thu, Dec 01, 2022 at 01:19:46PM +1100, Alexey Kardashevskiy wrote:
>> Subject: Re: [PATCH kernel 1/3] x86/amd/dr_addr_mask: Cache values in percpu variables
> 
> "x86/amd: " is perfectly fine as a prefix.
> 
>> Reading DR[0-3]_ADDR_MASK MSRs takes about 250 cycles which is going to
>> be noticeable when the AMD KVM SEV-ES's DebugSwap feature is enabled and
> 
> which does what? I.e., a sort of lazy DR regs swapping...
> 
>> KVM needs to store these before switching to a guest; the DebugSwitch
>> hardware support restores them as type B swap.
> 
> I know this is all clear to you but you should explain what type B
> register swap is.
> 
>> This stores MSR values from set_dr_addr_mask() in percpu values and
> 
> s/This stores/Store/
> 
>  From Documentation/process/submitting-patches.rst:
> 
>   "Describe your changes in imperative mood, e.g. "make xyzzy do frotz"
>    instead of "[This patch] makes xyzzy do frotz" or "[I] changed xyzzy
>    to do frotz", as if you are giving orders to the codebase to change
>    its behaviour."
> 
> Also, do not talk about what your patch does - that should hopefully be
> visible in the diff itself. Rather, talk about *why* you're doing what
> you're doing.
> 
>> returns them via new get_dr_addr_mask(). The gain here is about 10x.
>>
>> Signed-off-by: Alexey Kardashevskiy <aik@amd.com>
>> ---
>>   arch/x86/include/asm/debugreg.h |  1 +
>>   arch/x86/kernel/cpu/amd.c       | 32 ++++++++++++++++++++
>>   2 files changed, 33 insertions(+)
>>
>> diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
>> index cfdf307ddc01..c4324d0205b5 100644
>> --- a/arch/x86/include/asm/debugreg.h
>> +++ b/arch/x86/include/asm/debugreg.h
>> @@ -127,6 +127,7 @@ static __always_inline void local_db_restore(unsigned long dr7)
>>   
>>   #ifdef CONFIG_CPU_SUP_AMD
>>   extern void set_dr_addr_mask(unsigned long mask, int dr);
>> +extern unsigned long get_dr_addr_mask(int dr);
>>   #else
>>   static inline void set_dr_addr_mask(unsigned long mask, int dr) { }
>>   #endif
>> diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
>> index c75d75b9f11a..ec7efcef4e14 100644
>> --- a/arch/x86/kernel/cpu/amd.c
>> +++ b/arch/x86/kernel/cpu/amd.c
>> @@ -1158,6 +1158,11 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
>>   	return false;
>>   }
>>   
>> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr0_addr_mask);
>> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr1_addr_mask);
>> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr2_addr_mask);
>> +DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr3_addr_mask);
> 
> This BPEXT thing is AMD-only, right?
> 
> I guess those should be called amd_drX_addr_mask where X in [0-3].
> 
> Yeah yeah, they are used in AMD-only code - svm* - but still.
> 
>>   void set_dr_addr_mask(unsigned long mask, int dr)
>>   {
>>   	if (!boot_cpu_has(X86_FEATURE_BPEXT))
>> @@ -1166,17 +1171,44 @@ void set_dr_addr_mask(unsigned long mask, int dr)
>>   	switch (dr) {
>>   	case 0:
>>   		wrmsr(MSR_F16H_DR0_ADDR_MASK, mask, 0);
>> +		per_cpu(dr0_addr_mask, smp_processor_id()) = mask;
>>   		break;
>>   	case 1:
>> +		wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0);
>> +		per_cpu(dr1_addr_mask, smp_processor_id()) = mask;
>> +		break;
>>   	case 2:
>> +		wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0);
>> +		per_cpu(dr2_addr_mask, smp_processor_id()) = mask;
>> +		break;
>>   	case 3:
>>   		wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0);
>> +		per_cpu(dr3_addr_mask, smp_processor_id()) = mask;
>>   		break;
>>   	default:
>>   		break;
>>   	}
>>   }
>>   
>> +unsigned long get_dr_addr_mask(int dr)
> 
> This function name is too generic for an exported function.
> 
> amd_get_dr_addr_mask() I'd say.
> 
>> +	if (!boot_cpu_has(X86_FEATURE_BPEXT))
> 
> check_for_deprecated_apis: WARNING: arch/x86/kernel/cpu/amd.c:1195: Do not use boot_cpu_has() - use cpu_feature_enabled() instead.
> 
> You could fix the above one too, while at it.
> 
>> +		return 0;
>> +
>> +	switch (dr) {
>> +	case 0:
>> +		return per_cpu(dr0_addr_mask, smp_processor_id());
>> +	case 1:
>> +		return per_cpu(dr1_addr_mask, smp_processor_id());
>> +	case 2:
>> +		return per_cpu(dr2_addr_mask, smp_processor_id());
>> +	case 3:
>> +		return per_cpu(dr3_addr_mask, smp_processor_id());
> 
> 	default:	
> 		WARN_ON_ONCE(1);
> 		break;
> 
> Just in case.
> 
> And as a matter of fact, make that short and succinct:
> 
>          switch (dr) {
>          case 0: return per_cpu(dr0_addr_mask, smp_processor_id());
>          case 1: return per_cpu(dr1_addr_mask, smp_processor_id());
>          case 2: return per_cpu(dr2_addr_mask, smp_processor_id());
>          case 3: return per_cpu(dr3_addr_mask, smp_processor_id());
>          default: WARN_ON_ONCE(1); break;
>          }


Not an array, as Sean suggested? Uff...


> 
> Thx.
>
  
Borislav Petkov Dec. 8, 2022, 10:35 a.m. UTC | #7
On Thu, Dec 08, 2022 at 05:11:26PM +1100, Alexey Kardashevskiy wrote:
> Not an array, as Sean suggested? Uff...

Sure an array if it makes the code even more readable and clean. With an
array it should become even more compact, I'd say.

Thx.
  

Patch

diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index cfdf307ddc01..c4324d0205b5 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -127,6 +127,7 @@  static __always_inline void local_db_restore(unsigned long dr7)
 
 #ifdef CONFIG_CPU_SUP_AMD
 extern void set_dr_addr_mask(unsigned long mask, int dr);
+extern unsigned long get_dr_addr_mask(int dr);
 #else
 static inline void set_dr_addr_mask(unsigned long mask, int dr) { }
 #endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index c75d75b9f11a..ec7efcef4e14 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1158,6 +1158,11 @@  static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
 	return false;
 }
 
+DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr0_addr_mask);
+DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr1_addr_mask);
+DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr2_addr_mask);
+DEFINE_PER_CPU_READ_MOSTLY(unsigned long, dr3_addr_mask);
+
 void set_dr_addr_mask(unsigned long mask, int dr)
 {
 	if (!boot_cpu_has(X86_FEATURE_BPEXT))
@@ -1166,17 +1171,44 @@  void set_dr_addr_mask(unsigned long mask, int dr)
 	switch (dr) {
 	case 0:
 		wrmsr(MSR_F16H_DR0_ADDR_MASK, mask, 0);
+		per_cpu(dr0_addr_mask, smp_processor_id()) = mask;
 		break;
 	case 1:
+		wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0);
+		per_cpu(dr1_addr_mask, smp_processor_id()) = mask;
+		break;
 	case 2:
+		wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0);
+		per_cpu(dr2_addr_mask, smp_processor_id()) = mask;
+		break;
 	case 3:
 		wrmsr(MSR_F16H_DR1_ADDR_MASK - 1 + dr, mask, 0);
+		per_cpu(dr3_addr_mask, smp_processor_id()) = mask;
 		break;
 	default:
 		break;
 	}
 }
 
+unsigned long get_dr_addr_mask(int dr)
+{
+	if (!boot_cpu_has(X86_FEATURE_BPEXT))
+		return 0;
+
+	switch (dr) {
+	case 0:
+		return per_cpu(dr0_addr_mask, smp_processor_id());
+	case 1:
+		return per_cpu(dr1_addr_mask, smp_processor_id());
+	case 2:
+		return per_cpu(dr2_addr_mask, smp_processor_id());
+	case 3:
+		return per_cpu(dr3_addr_mask, smp_processor_id());
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(get_dr_addr_mask);
+
 u32 amd_get_highest_perf(void)
 {
 	struct cpuinfo_x86 *c = &boot_cpu_data;