[RFC] KVM: arm64: Override Microsoft Azure Cobalt 100 MIDR value with ARM Neoverse N2

Message ID 20240206195819.1146693-1-eahariha@linux.microsoft.com
State New
Headers
Series [RFC] KVM: arm64: Override Microsoft Azure Cobalt 100 MIDR value with ARM Neoverse N2 |

Commit Message

Easwar Hariharan Feb. 6, 2024, 7:58 p.m. UTC
  Several workload optimizations and errata depend on validating that the
optimization or errata are applicable to the particular CPU by checking
the MIDR_EL1 system register value. With the Microsoft implementer ID
for Azure Cobalt 100, the value doesn't match and ~20-25% performance
regression is seen in these workloads. Override the Azure Cobalt 100
value and replace it with the default ARM Neoverse N2 value that Azure
Cobalt 100 is based on.

Signed-off-by: Easwar Hariharan <eahariha@linux.microsoft.com>
---
 arch/arm64/include/asm/cputype.h   | 3 ++-
 arch/arm64/include/asm/el2_setup.h | 5 +++++
 arch/arm64/kvm/sys_regs.c          | 9 ++++++++-
 3 files changed, 15 insertions(+), 2 deletions(-)
  

Comments

Anshuman Khandual Feb. 7, 2024, 7:54 a.m. UTC | #1
On 2/7/24 01:28, Easwar Hariharan wrote:
> Several workload optimizations and errata depend on validating that the
> optimization or errata are applicable to the particular CPU by checking
> the MIDR_EL1 system register value. With the Microsoft implementer ID

Which is how it should be done.

> for Azure Cobalt 100, the value doesn't match and ~20-25% performance
> regression is seen in these workloads. Override the Azure Cobalt 100
> value and replace it with the default ARM Neoverse N2 value that Azure
> Cobalt 100 is based on.

Why cannot these MIDR values be classified as required and subscribed to
the existing erratas that is affecting such implementations. Hence these
work arounds will be triggered as and when applicable. Why then override
MIDR value instead ?

> 
> Signed-off-by: Easwar Hariharan <eahariha@linux.microsoft.com>
> ---
>  arch/arm64/include/asm/cputype.h   | 3 ++-
>  arch/arm64/include/asm/el2_setup.h | 5 +++++
>  arch/arm64/kvm/sys_regs.c          | 9 ++++++++-
>  3 files changed, 15 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
> index 7c7493cb571f..0450c6c32377 100644
> --- a/arch/arm64/include/asm/cputype.h
> +++ b/arch/arm64/include/asm/cputype.h
> @@ -262,7 +262,8 @@ is_midr_in_range_list(u32 midr, struct midr_range const *ranges)
>   */
>  static inline u32 __attribute_const__ read_cpuid_id(void)
>  {
> -	return read_cpuid(MIDR_EL1);
> +	return (read_cpuid(MIDR_EL1) == 0x6D0FD490 ? 0x410FD490 :
> +			read_cpuid(MIDR_EL1));
>  }
>  
>  static inline u64 __attribute_const__ read_cpuid_mpidr(void)
> diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
> index b7afaa026842..502a14e54a31 100644
> --- a/arch/arm64/include/asm/el2_setup.h
> +++ b/arch/arm64/include/asm/el2_setup.h
> @@ -138,6 +138,11 @@
>  .macro __init_el2_nvhe_idregs
>  	mrs	x0, midr_el1
>  	mrs	x1, mpidr_el1
> +	ldr	x2, =0x6D0FD490
> +	cmp	x0, x2
> +	bne	.Loverride_cobalt100_\@
> +	ldr	x0, =0x410FD490
> +.Loverride_cobalt100_\@:
>  	msr	vpidr_el2, x0
>  	msr	vmpidr_el2, x1
>  .endm
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 30253bd19917..8ea9c7fdabdb 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -3574,7 +3574,14 @@ id_to_sys_reg_desc(struct kvm_vcpu *vcpu, u64 id,
>  		return ((struct sys_reg_desc *)r)->val;			\
>  	}
>  
> -FUNCTION_INVARIANT(midr_el1)
> +static u64 get_midr_el1(struct kvm_vcpu *v, const struct sys_reg_desc *r)
> +{
> +	((struct sys_reg_desc *)r)->val = read_sysreg(midr_el1);
> +	if (((struct sys_reg_desc *)r)->val == 0x6D0FD490)
> +		((struct sys_reg_desc *)r)->val == 0x410FD490;
> +	return ((struct sys_reg_desc *)r)->val;
> +}
> +
>  FUNCTION_INVARIANT(revidr_el1)
>  FUNCTION_INVARIANT(aidr_el1)
>
  
Mark Rutland Feb. 7, 2024, 9:49 a.m. UTC | #2
On Tue, Feb 06, 2024 at 07:58:16PM +0000, Easwar Hariharan wrote:
> Several workload optimizations and errata depend on validating that the
> optimization or errata are applicable to the particular CPU by checking
> the MIDR_EL1 system register value. With the Microsoft implementer ID
> for Azure Cobalt 100, the value doesn't match and ~20-25% performance
> regression is seen in these workloads. Override the Azure Cobalt 100
> value and replace it with the default ARM Neoverse N2 value that Azure
> Cobalt 100 is based on.

NAK to rewriting the MIDR in the kernel; we do not lie to userspace about the
MIDR, and this is not a can of worms we're going to open.

If you desire some microarchitectural performance optimizations in particular
projects, please submit patches to those projects to understand your MIDR
value.

Further, if Azure Cobalt 100 is based on ARM Neoverse N2, you presumably suffer
from the same errata; can you comment on that at all? e.g. are there any
changes in this part that *might* lead to differences in errata and/or
workarounds? How do the MIDR_EL1.{Variant,Revision} values compare to that of
Neoverse N2?

Mark.

> Signed-off-by: Easwar Hariharan <eahariha@linux.microsoft.com>
> ---
>  arch/arm64/include/asm/cputype.h   | 3 ++-
>  arch/arm64/include/asm/el2_setup.h | 5 +++++
>  arch/arm64/kvm/sys_regs.c          | 9 ++++++++-
>  3 files changed, 15 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
> index 7c7493cb571f..0450c6c32377 100644
> --- a/arch/arm64/include/asm/cputype.h
> +++ b/arch/arm64/include/asm/cputype.h
> @@ -262,7 +262,8 @@ is_midr_in_range_list(u32 midr, struct midr_range const *ranges)
>   */
>  static inline u32 __attribute_const__ read_cpuid_id(void)
>  {
> -	return read_cpuid(MIDR_EL1);
> +	return (read_cpuid(MIDR_EL1) == 0x6D0FD490 ? 0x410FD490 :
> +			read_cpuid(MIDR_EL1));
>  }
>  
>  static inline u64 __attribute_const__ read_cpuid_mpidr(void)
> diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
> index b7afaa026842..502a14e54a31 100644
> --- a/arch/arm64/include/asm/el2_setup.h
> +++ b/arch/arm64/include/asm/el2_setup.h
> @@ -138,6 +138,11 @@
>  .macro __init_el2_nvhe_idregs
>  	mrs	x0, midr_el1
>  	mrs	x1, mpidr_el1
> +	ldr	x2, =0x6D0FD490
> +	cmp	x0, x2
> +	bne	.Loverride_cobalt100_\@
> +	ldr	x0, =0x410FD490
> +.Loverride_cobalt100_\@:
>  	msr	vpidr_el2, x0
>  	msr	vmpidr_el2, x1
>  .endm
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 30253bd19917..8ea9c7fdabdb 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -3574,7 +3574,14 @@ id_to_sys_reg_desc(struct kvm_vcpu *vcpu, u64 id,
>  		return ((struct sys_reg_desc *)r)->val;			\
>  	}
>  
> -FUNCTION_INVARIANT(midr_el1)
> +static u64 get_midr_el1(struct kvm_vcpu *v, const struct sys_reg_desc *r)
> +{
> +	((struct sys_reg_desc *)r)->val = read_sysreg(midr_el1);
> +	if (((struct sys_reg_desc *)r)->val == 0x6D0FD490)
> +		((struct sys_reg_desc *)r)->val == 0x410FD490;
> +	return ((struct sys_reg_desc *)r)->val;
> +}
> +
>  FUNCTION_INVARIANT(revidr_el1)
>  FUNCTION_INVARIANT(aidr_el1)
>  
> -- 
> 2.34.1
> 
>
  
Marc Zyngier Feb. 7, 2024, 9:50 a.m. UTC | #3
On Tue, 06 Feb 2024 19:58:16 +0000,
Easwar Hariharan <eahariha@linux.microsoft.com> wrote:
> 
> Several workload optimizations and errata depend on validating that the
> optimization or errata are applicable to the particular CPU by checking
> the MIDR_EL1 system register value. With the Microsoft implementer ID
> for Azure Cobalt 100, the value doesn't match and ~20-25% performance
> regression is seen in these workloads. Override the Azure Cobalt 100
> value and replace it with the default ARM Neoverse N2 value that Azure
> Cobalt 100 is based on.

Since you don't disclose *why* this particular value should have any
impact on the behaviour of the kernel, the answer should be "Thanks,
but no, thanks".

Whatever the reason is for doing so, you should make it plain what you
are working around. Blindly overriding ID registers is not an option,
and you should simply add your MIDR value to whatever errata list that
actually matches your implementation.

> 
> Signed-off-by: Easwar Hariharan <eahariha@linux.microsoft.com>
> ---
>  arch/arm64/include/asm/cputype.h   | 3 ++-
>  arch/arm64/include/asm/el2_setup.h | 5 +++++
>  arch/arm64/kvm/sys_regs.c          | 9 ++++++++-
>  3 files changed, 15 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
> index 7c7493cb571f..0450c6c32377 100644
> --- a/arch/arm64/include/asm/cputype.h
> +++ b/arch/arm64/include/asm/cputype.h
> @@ -262,7 +262,8 @@ is_midr_in_range_list(u32 midr, struct midr_range const *ranges)
>   */
>  static inline u32 __attribute_const__ read_cpuid_id(void)
>  {
> -	return read_cpuid(MIDR_EL1);
> +	return (read_cpuid(MIDR_EL1) == 0x6D0FD490 ? 0x410FD490 :
> +			read_cpuid(MIDR_EL1));
>  }
>  
>  static inline u64 __attribute_const__ read_cpuid_mpidr(void)
> diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
> index b7afaa026842..502a14e54a31 100644
> --- a/arch/arm64/include/asm/el2_setup.h
> +++ b/arch/arm64/include/asm/el2_setup.h
> @@ -138,6 +138,11 @@
>  .macro __init_el2_nvhe_idregs
>  	mrs	x0, midr_el1
>  	mrs	x1, mpidr_el1
> +	ldr	x2, =0x6D0FD490
> +	cmp	x0, x2
> +	bne	.Loverride_cobalt100_\@
> +	ldr	x0, =0x410FD490
> +.Loverride_cobalt100_\@:
>  	msr	vpidr_el2, x0
>  	msr	vmpidr_el2, x1
>  .endm
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 30253bd19917..8ea9c7fdabdb 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -3574,7 +3574,14 @@ id_to_sys_reg_desc(struct kvm_vcpu *vcpu, u64 id,
>  		return ((struct sys_reg_desc *)r)->val;			\
>  	}
>  
> -FUNCTION_INVARIANT(midr_el1)
> +static u64 get_midr_el1(struct kvm_vcpu *v, const struct sys_reg_desc *r)
> +{
> +	((struct sys_reg_desc *)r)->val = read_sysreg(midr_el1);
> +	if (((struct sys_reg_desc *)r)->val == 0x6D0FD490)
> +		((struct sys_reg_desc *)r)->val == 0x410FD490;

As pointed out to me by Joey, this line is really interesting, and
shows that you didn't really test this patch.

Thanks,

	M.
  
Easwar Hariharan Feb. 8, 2024, 7:16 p.m. UTC | #4
On 2/7/2024 1:50 AM, Marc Zyngier wrote:
> On Tue, 06 Feb 2024 19:58:16 +0000,
> Easwar Hariharan <eahariha@linux.microsoft.com> wrote:
>>
>> Several workload optimizations and errata depend on validating that the
>> optimization or errata are applicable to the particular CPU by checking
>> the MIDR_EL1 system register value. With the Microsoft implementer ID
>> for Azure Cobalt 100, the value doesn't match and ~20-25% performance
>> regression is seen in these workloads. Override the Azure Cobalt 100
>> value and replace it with the default ARM Neoverse N2 value that Azure
>> Cobalt 100 is based on.
> 
> Since you don't disclose *why* this particular value should have any
> impact on the behaviour of the kernel, the answer should be "Thanks,
> but no, thanks".
>

The optimizations mentioned in the commit message reside in userspace
and depend on the MIDR value exposed to userspace by the kernel. As
mentioned in my response to Anshuman, this patch was a proof of concept
to have userspace apply the Neoverse N2 optimizations to Azure Cobalt 100
as well.

> Whatever the reason is for doing so, you should make it plain what you
> are working around. Blindly overriding ID registers is not an option,
> and you should simply add your MIDR value to whatever errata list that
> actually matches your implementation.
>

Thank you, I will do that.

>>
>> Signed-off-by: Easwar Hariharan <eahariha@linux.microsoft.com>
>> ---
>>  arch/arm64/include/asm/cputype.h   | 3 ++-
>>  arch/arm64/include/asm/el2_setup.h | 5 +++++
>>  arch/arm64/kvm/sys_regs.c          | 9 ++++++++-
>>  3 files changed, 15 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
>> index 7c7493cb571f..0450c6c32377 100644
>> --- a/arch/arm64/include/asm/cputype.h
>> +++ b/arch/arm64/include/asm/cputype.h
>> @@ -262,7 +262,8 @@ is_midr_in_range_list(u32 midr, struct midr_range const *ranges)
>>   */
>>  static inline u32 __attribute_const__ read_cpuid_id(void)
>>  {
>> -	return read_cpuid(MIDR_EL1);
>> +	return (read_cpuid(MIDR_EL1) == 0x6D0FD490 ? 0x410FD490 :
>> +			read_cpuid(MIDR_EL1));
>>  }
>>  
>>  static inline u64 __attribute_const__ read_cpuid_mpidr(void)
>> diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
>> index b7afaa026842..502a14e54a31 100644
>> --- a/arch/arm64/include/asm/el2_setup.h
>> +++ b/arch/arm64/include/asm/el2_setup.h
>> @@ -138,6 +138,11 @@
>>  .macro __init_el2_nvhe_idregs
>>  	mrs	x0, midr_el1
>>  	mrs	x1, mpidr_el1
>> +	ldr	x2, =0x6D0FD490
>> +	cmp	x0, x2
>> +	bne	.Loverride_cobalt100_\@
>> +	ldr	x0, =0x410FD490
>> +.Loverride_cobalt100_\@:
>>  	msr	vpidr_el2, x0
>>  	msr	vmpidr_el2, x1
>>  .endm
>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
>> index 30253bd19917..8ea9c7fdabdb 100644
>> --- a/arch/arm64/kvm/sys_regs.c
>> +++ b/arch/arm64/kvm/sys_regs.c
>> @@ -3574,7 +3574,14 @@ id_to_sys_reg_desc(struct kvm_vcpu *vcpu, u64 id,
>>  		return ((struct sys_reg_desc *)r)->val;			\
>>  	}
>>  
>> -FUNCTION_INVARIANT(midr_el1)
>> +static u64 get_midr_el1(struct kvm_vcpu *v, const struct sys_reg_desc *r)
>> +{
>> +	((struct sys_reg_desc *)r)->val = read_sysreg(midr_el1);
>> +	if (((struct sys_reg_desc *)r)->val == 0x6D0FD490)
>> +		((struct sys_reg_desc *)r)->val == 0x410FD490;
> 
> As pointed out to me by Joey, this line is really interesting, and
> shows that you didn't really test this patch.
>

That has clearly escaped my notice, but we did test the patch and
validate that the Neoverse N2 MIDR value showed up where we looked.
Being new to arch/arm64, it's very possible that I may have modified
this hunk without needing to.

> Thanks,
> 
> 	M.
>
  
Easwar Hariharan Feb. 8, 2024, 7:16 p.m. UTC | #5
On 2/7/2024 1:49 AM, Mark Rutland wrote:
> On Tue, Feb 06, 2024 at 07:58:16PM +0000, Easwar Hariharan wrote:
>> Several workload optimizations and errata depend on validating that the
>> optimization or errata are applicable to the particular CPU by checking
>> the MIDR_EL1 system register value. With the Microsoft implementer ID
>> for Azure Cobalt 100, the value doesn't match and ~20-25% performance
>> regression is seen in these workloads. Override the Azure Cobalt 100
>> value and replace it with the default ARM Neoverse N2 value that Azure
>> Cobalt 100 is based on.
> 
> NAK to rewriting the MIDR in the kernel; we do not lie to userspace about the
> MIDR, and this is not a can of worms we're going to open.
> 
> If you desire some microarchitectural performance optimizations in particular
> projects, please submit patches to those projects to understand your MIDR
> value.

Understood.

> 
> Further, if Azure Cobalt 100 is based on ARM Neoverse N2, you presumably suffer
> from the same errata; can you comment on that at all? e.g. are there any
> changes in this part that *might* lead to differences in errata and/or
> workarounds? How do the MIDR_EL1.{Variant,Revision} values compare to that of
> Neoverse N2?
>

Yes, Azure Cobalt 100 suffers from the same errata as Neoverse N2. We had changes
in the implementation, but according to our hardware folks, the Neoverse N2 errata
we are affected by so far aren't affected by the changes made for Azure Cobalt 100.

> Mark.
> 
>> Signed-off-by: Easwar Hariharan <eahariha@linux.microsoft.com>
>> ---
>>  arch/arm64/include/asm/cputype.h   | 3 ++-
>>  arch/arm64/include/asm/el2_setup.h | 5 +++++
>>  arch/arm64/kvm/sys_regs.c          | 9 ++++++++-
>>  3 files changed, 15 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
>> index 7c7493cb571f..0450c6c32377 100644
>> --- a/arch/arm64/include/asm/cputype.h
>> +++ b/arch/arm64/include/asm/cputype.h
>> @@ -262,7 +262,8 @@ is_midr_in_range_list(u32 midr, struct midr_range const *ranges)
>>   */
>>  static inline u32 __attribute_const__ read_cpuid_id(void)
>>  {
>> -	return read_cpuid(MIDR_EL1);
>> +	return (read_cpuid(MIDR_EL1) == 0x6D0FD490 ? 0x410FD490 :
>> +			read_cpuid(MIDR_EL1));
>>  }
>>  
>>  static inline u64 __attribute_const__ read_cpuid_mpidr(void)
>> diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
>> index b7afaa026842..502a14e54a31 100644
>> --- a/arch/arm64/include/asm/el2_setup.h
>> +++ b/arch/arm64/include/asm/el2_setup.h
>> @@ -138,6 +138,11 @@
>>  .macro __init_el2_nvhe_idregs
>>  	mrs	x0, midr_el1
>>  	mrs	x1, mpidr_el1
>> +	ldr	x2, =0x6D0FD490
>> +	cmp	x0, x2
>> +	bne	.Loverride_cobalt100_\@
>> +	ldr	x0, =0x410FD490
>> +.Loverride_cobalt100_\@:
>>  	msr	vpidr_el2, x0
>>  	msr	vmpidr_el2, x1
>>  .endm
>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
>> index 30253bd19917..8ea9c7fdabdb 100644
>> --- a/arch/arm64/kvm/sys_regs.c
>> +++ b/arch/arm64/kvm/sys_regs.c
>> @@ -3574,7 +3574,14 @@ id_to_sys_reg_desc(struct kvm_vcpu *vcpu, u64 id,
>>  		return ((struct sys_reg_desc *)r)->val;			\
>>  	}
>>  
>> -FUNCTION_INVARIANT(midr_el1)
>> +static u64 get_midr_el1(struct kvm_vcpu *v, const struct sys_reg_desc *r)
>> +{
>> +	((struct sys_reg_desc *)r)->val = read_sysreg(midr_el1);
>> +	if (((struct sys_reg_desc *)r)->val == 0x6D0FD490)
>> +		((struct sys_reg_desc *)r)->val == 0x410FD490;
>> +	return ((struct sys_reg_desc *)r)->val;
>> +}
>> +
>>  FUNCTION_INVARIANT(revidr_el1)
>>  FUNCTION_INVARIANT(aidr_el1)
>>  
>> -- 
>> 2.34.1
>>
>>
  
Easwar Hariharan Feb. 8, 2024, 7:16 p.m. UTC | #6
On 2/6/2024 11:54 PM, Anshuman Khandual wrote:
> 
> On 2/7/24 01:28, Easwar Hariharan wrote:
>> Several workload optimizations and errata depend on validating that the
>> optimization or errata are applicable to the particular CPU by checking
>> the MIDR_EL1 system register value. With the Microsoft implementer ID
> 
> Which is how it should be done.
> 
>> for Azure Cobalt 100, the value doesn't match and ~20-25% performance
>> regression is seen in these workloads. Override the Azure Cobalt 100
>> value and replace it with the default ARM Neoverse N2 value that Azure
>> Cobalt 100 is based on.
> 
> Why cannot these MIDR values be classified as required and subscribed to
> the existing erratas that is affecting such implementations. Hence these
> work arounds will be triggered as and when applicable. Why then override
> MIDR value instead ?
>

Thanks for the feedback, I will go ahead and add the Azure Cobalt 100 MIDR
value to the range of MIDRs affected by the Neoverse N2 errata. This
patch was a proof of concept to have userspace apply the Neoverse N2
optimizations to Azure Cobalt 100 as well. As Mark mentioned in a sibling
response, this is not an acceptable way to accomplish this.

>>
>> Signed-off-by: Easwar Hariharan <eahariha@linux.microsoft.com>
>> ---
>>  arch/arm64/include/asm/cputype.h   | 3 ++-
>>  arch/arm64/include/asm/el2_setup.h | 5 +++++
>>  arch/arm64/kvm/sys_regs.c          | 9 ++++++++-
>>  3 files changed, 15 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
>> index 7c7493cb571f..0450c6c32377 100644
>> --- a/arch/arm64/include/asm/cputype.h
>> +++ b/arch/arm64/include/asm/cputype.h
>> @@ -262,7 +262,8 @@ is_midr_in_range_list(u32 midr, struct midr_range const *ranges)
>>   */
>>  static inline u32 __attribute_const__ read_cpuid_id(void)
>>  {
>> -	return read_cpuid(MIDR_EL1);
>> +	return (read_cpuid(MIDR_EL1) == 0x6D0FD490 ? 0x410FD490 :
>> +			read_cpuid(MIDR_EL1));
>>  }
>>  
>>  static inline u64 __attribute_const__ read_cpuid_mpidr(void)
>> diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
>> index b7afaa026842..502a14e54a31 100644
>> --- a/arch/arm64/include/asm/el2_setup.h
>> +++ b/arch/arm64/include/asm/el2_setup.h
>> @@ -138,6 +138,11 @@
>>  .macro __init_el2_nvhe_idregs
>>  	mrs	x0, midr_el1
>>  	mrs	x1, mpidr_el1
>> +	ldr	x2, =0x6D0FD490
>> +	cmp	x0, x2
>> +	bne	.Loverride_cobalt100_\@
>> +	ldr	x0, =0x410FD490
>> +.Loverride_cobalt100_\@:
>>  	msr	vpidr_el2, x0
>>  	msr	vmpidr_el2, x1
>>  .endm
>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
>> index 30253bd19917..8ea9c7fdabdb 100644
>> --- a/arch/arm64/kvm/sys_regs.c
>> +++ b/arch/arm64/kvm/sys_regs.c
>> @@ -3574,7 +3574,14 @@ id_to_sys_reg_desc(struct kvm_vcpu *vcpu, u64 id,
>>  		return ((struct sys_reg_desc *)r)->val;			\
>>  	}
>>  
>> -FUNCTION_INVARIANT(midr_el1)
>> +static u64 get_midr_el1(struct kvm_vcpu *v, const struct sys_reg_desc *r)
>> +{
>> +	((struct sys_reg_desc *)r)->val = read_sysreg(midr_el1);
>> +	if (((struct sys_reg_desc *)r)->val == 0x6D0FD490)
>> +		((struct sys_reg_desc *)r)->val == 0x410FD490;
>> +	return ((struct sys_reg_desc *)r)->val;
>> +}
>> +
>>  FUNCTION_INVARIANT(revidr_el1)
>>  FUNCTION_INVARIANT(aidr_el1)
>>
  
Mark Rutland Feb. 9, 2024, 11:33 a.m. UTC | #7
On Thu, Feb 08, 2024 at 11:16:10AM -0800, Easwar Hariharan wrote:
> On 2/7/2024 1:49 AM, Mark Rutland wrote:
> > On Tue, Feb 06, 2024 at 07:58:16PM +0000, Easwar Hariharan wrote:
> > Further, if Azure Cobalt 100 is based on ARM Neoverse N2, you presumably suffer
> > from the same errata; can you comment on that at all? e.g. are there any
> > changes in this part that *might* lead to differences in errata and/or
> > workarounds? How do the MIDR_EL1.{Variant,Revision} values compare to that of
> > Neoverse N2?
> 
> Yes, Azure Cobalt 100 suffers from the same errata as Neoverse N2. We had changes
> in the implementation, but according to our hardware folks, the Neoverse N2 errata
> we are affected by so far aren't affected by the changes made for Azure Cobalt 100.

Ok, so of the currently-known-and-mitigated errata, you'll be affected by:

	ARM64_ERRATUM_2139208
	ARM64_ERRATUM_2067961
	ARM64_ERRATUM_2253138

.. and we'll need to extend the midr_range lists for those errata to cover
Azure Cobalt 100.

From your patch, it looks like the Azure Cobalt 100 MIDR value (0x6D0FD490) is
the same as the Arm Neoverse-N2 r0p0 MIDR value (0x410FD490), except the
'Implementer' field is 0x6D ('m' in ASCII) rather than 0x41 ('A' in ASCII).

Are you happy to send a patch extending arch/arm64/include/asm/cputype.h with
the relevant ARM_CPU_IMP_* and CPU_PART_* definitions, and use those to extend
the midr_range lists for those errata?

As above, if you could make any comment on how the MIDR_EL1.{Variant,Revision}
fields map to that of Arm Neoverse-N2, it would be very helpful. It's not clear
to me whether those fields correspond directly (and so this part is based on
r0p0), or whether you have a different scheme for revision numbers. That'll
matter for correctly matching any future errata and/or future revisions of
Azure Cobalt 100.

Mark.
  
Easwar Hariharan Feb. 9, 2024, 6:58 p.m. UTC | #8
On 2/9/2024 3:33 AM, Mark Rutland wrote:
> On Thu, Feb 08, 2024 at 11:16:10AM -0800, Easwar Hariharan wrote:
>> On 2/7/2024 1:49 AM, Mark Rutland wrote:
>>> On Tue, Feb 06, 2024 at 07:58:16PM +0000, Easwar Hariharan wrote:
>>> Further, if Azure Cobalt 100 is based on ARM Neoverse N2, you presumably suffer
>>> from the same errata; can you comment on that at all? e.g. are there any
>>> changes in this part that *might* lead to differences in errata and/or
>>> workarounds? How do the MIDR_EL1.{Variant,Revision} values compare to that of
>>> Neoverse N2?
>>
>> Yes, Azure Cobalt 100 suffers from the same errata as Neoverse N2. We had changes
>> in the implementation, but according to our hardware folks, the Neoverse N2 errata
>> we are affected by so far aren't affected by the changes made for Azure Cobalt 100.
> 
> Ok, so of the currently-known-and-mitigated errata, you'll be affected by:
> 
> 	ARM64_ERRATUM_2139208
> 	ARM64_ERRATUM_2067961
> 	ARM64_ERRATUM_2253138
> 
> ... and we'll need to extend the midr_range lists for those errata to cover
> Azure Cobalt 100.
> 
>>From your patch, it looks like the Azure Cobalt 100 MIDR value (0x6D0FD490) is
> the same as the Arm Neoverse-N2 r0p0 MIDR value (0x410FD490), except the
> 'Implementer' field is 0x6D ('m' in ASCII) rather than 0x41 ('A' in ASCII).
> 
> Are you happy to send a patch extending arch/arm64/include/asm/cputype.h with
> the relevant ARM_CPU_IMP_* and CPU_PART_* definitions, and use those to extend
> the midr_range lists for those errata?

Yes.

> 
> As above, if you could make any comment on how the MIDR_EL1.{Variant,Revision}
> fields map to that of Arm Neoverse-N2, it would be very helpful. It's not clear
> to me whether those fields correspond directly (and so this part is based on
> r0p0), or whether you have a different scheme for revision numbers. That'll
> matter for correctly matching any future errata and/or future revisions of
> Azure Cobalt 100.
> 

Thanks for the clarifying detail on your question. Azure Cobalt 100 is indeed based
on r0p0 of the Neoverse N-2 and we have not used a different scheme than Neoverse N2
for the Variant and Revision fields.


> Mark.
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
  

Patch

diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 7c7493cb571f..0450c6c32377 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -262,7 +262,8 @@  is_midr_in_range_list(u32 midr, struct midr_range const *ranges)
  */
 static inline u32 __attribute_const__ read_cpuid_id(void)
 {
-	return read_cpuid(MIDR_EL1);
+	return (read_cpuid(MIDR_EL1) == 0x6D0FD490 ? 0x410FD490 :
+			read_cpuid(MIDR_EL1));
 }
 
 static inline u64 __attribute_const__ read_cpuid_mpidr(void)
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index b7afaa026842..502a14e54a31 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -138,6 +138,11 @@ 
 .macro __init_el2_nvhe_idregs
 	mrs	x0, midr_el1
 	mrs	x1, mpidr_el1
+	ldr	x2, =0x6D0FD490
+	cmp	x0, x2
+	bne	.Loverride_cobalt100_\@
+	ldr	x0, =0x410FD490
+.Loverride_cobalt100_\@:
 	msr	vpidr_el2, x0
 	msr	vmpidr_el2, x1
 .endm
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 30253bd19917..8ea9c7fdabdb 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -3574,7 +3574,14 @@  id_to_sys_reg_desc(struct kvm_vcpu *vcpu, u64 id,
 		return ((struct sys_reg_desc *)r)->val;			\
 	}
 
-FUNCTION_INVARIANT(midr_el1)
+static u64 get_midr_el1(struct kvm_vcpu *v, const struct sys_reg_desc *r)
+{
+	((struct sys_reg_desc *)r)->val = read_sysreg(midr_el1);
+	if (((struct sys_reg_desc *)r)->val == 0x6D0FD490)
+		((struct sys_reg_desc *)r)->val == 0x410FD490;
+	return ((struct sys_reg_desc *)r)->val;
+}
+
 FUNCTION_INVARIANT(revidr_el1)
 FUNCTION_INVARIANT(aidr_el1)