[3/4] KVM: arm64: selftests: Align VA space allocator with TTBR0

Message ID 20221207214809.489070-4-oliver.upton@linux.dev
State New
Headers
Series [1/4] KVM: selftests: Fix build due to ucall_uninit() removal |

Commit Message

Oliver Upton Dec. 7, 2022, 9:48 p.m. UTC
  An interesting feature of the Arm architecture is that the stage-1 MMU
supports two distinct VA regions, controlled by TTBR{0,1}_EL1. As KVM
selftests on arm64 only uses TTBR0_EL1, the VA space is constrained to
[0, 2^(va_bits-1)). This is different from other architectures that
allow for addressing low and high regions of the VA space from a single
page table.

KVM selftests' VA space allocator presumes the valid address range is
split between low and high memory based the MSB, which of course is a
poor match for arm64's TTBR0 region.

Allow architectures to override the default VA space layout. Make use of
the override to align vpages_valid with the behavior of TTBR0 on arm64.

Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
---
 .../testing/selftests/kvm/include/kvm_util_base.h |  1 +
 .../testing/selftests/kvm/lib/aarch64/processor.c | 10 ++++++++++
 tools/testing/selftests/kvm/lib/kvm_util.c        | 15 ++++++++++-----
 3 files changed, 21 insertions(+), 5 deletions(-)
  

Comments

Sean Christopherson Dec. 8, 2022, 12:18 a.m. UTC | #1
On Wed, Dec 07, 2022, Oliver Upton wrote:
> diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
> index 316de70db91d..5972a23b2765 100644
> --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
> +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
> @@ -541,3 +541,13 @@ void kvm_selftest_arch_init(void)
>  	 */
>  	guest_modes_append_default();
>  }
> +
> +void vm_vaddr_populate_bitmap(struct kvm_vm *vm)

Add "arch" so that it's obvious this can be overidden?  The "__weak" conveys that
for the implementation, but not for the call site.  E.g. vm_arch_vaddr_populate_bitmap().

Actually, IIUC, the issue is that the high half isn't mapped (probably the wrong
terminology).  I.e. the calculation for the low half stays the same, and the high
half just goes away.

> +{
> +	/*
> +	 * arm64 selftests use only TTBR0_EL1, meaning that the valid VA space
> +	 * is [0, 2^(64 - TCR_EL1.T0SZ)).
> +	 */
> +	sparsebit_set_num(vm->vpages_valid, 0,
> +			  (1ULL << vm->va_bits) >> vm->page_shift);
> +}
> diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
> index e9607eb089be..c88c3ace16d2 100644
> --- a/tools/testing/selftests/kvm/lib/kvm_util.c
> +++ b/tools/testing/selftests/kvm/lib/kvm_util.c
> @@ -186,6 +186,15 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = {
>  _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
>  	       "Missing new mode params?");
>  
> +__weak void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
> +{
> +	sparsebit_set_num(vm->vpages_valid,
> +		0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
> +	sparsebit_set_num(vm->vpages_valid,
> +		(~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift,
> +		(1ULL << (vm->va_bits - 1)) >> vm->page_shift);

Any objection to fixing up the formatting?  Actually, we can do more than just
fix the indentation, e.g. the number of bits is identical, and documenting that
this does a high/low split would be helpful.

Together, what about?  The #ifdef is a bit gross, especially around "hi_start",
but it's less duplicate code.  And IMO, having things bundled in the same place
makes it a lot easier for newbies (to arm64 or kernel coding in general) to
understand what's going on and why arm64 is different.

---
 tools/testing/selftests/kvm/lib/kvm_util.c | 23 +++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index e9607eb089be..d6f2c17e3d40 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -186,6 +186,23 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = {
 _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
 	       "Missing new mode params?");
 
+static void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
+{
+	/*
+	 * All architectures supports splitting the virtual address space into
+	 * a high and a low half.  Populate both halves, except for arm64 which
+	 * currently uses only TTBR0_EL1 (arbitrary selftests "logic"), i.e.
+	 * only has a valid low half.
+	 */
+	sparsebit_num_t nr_va_bits = (1ULL << (vm->va_bits - 1)) >> vm->page_shift;
+#ifndef __aarch64__
+	sparsebit_num_t hi_start = (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift
+
+	sparsebit_set_num(vm->vpages_valid, hi_start, nr_bits);
+#endif
+	sparsebit_set_num(vm->vpages_valid, 0, nr_va_bits);
+}
+
 struct kvm_vm *____vm_create(enum vm_guest_mode mode)
 {
 	struct kvm_vm *vm;
@@ -274,11 +291,7 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode)
 
 	/* Limit to VA-bit canonical virtual addresses. */
 	vm->vpages_valid = sparsebit_alloc();
-	sparsebit_set_num(vm->vpages_valid,
-		0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
-	sparsebit_set_num(vm->vpages_valid,
-		(~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift,
-		(1ULL << (vm->va_bits - 1)) >> vm->page_shift);
+	vm_vaddr_populate_bitmap(vm);
 
 	/* Limit physical addresses to PA-bits. */
 	vm->max_gfn = vm_compute_max_gfn(vm);

base-commit: 35aecc3289eebf193fd70a067ea448ae2f0bb9b9
--
  
Oliver Upton Dec. 8, 2022, 12:27 a.m. UTC | #2
On Thu, Dec 08, 2022 at 12:18:07AM +0000, Sean Christopherson wrote:

[...]

> Together, what about?  The #ifdef is a bit gross, especially around "hi_start",
> but it's less duplicate code.  And IMO, having things bundled in the same place
> makes it a lot easier for newbies (to arm64 or kernel coding in general) to
> understand what's going on and why arm64 is different.

I'd rather we not go this route. We really shouldn't make any attempt to
de-dupe something that is inherently architecture specific.

For example:

> +	/*
> +	 * All architectures supports splitting the virtual address space into
> +	 * a high and a low half.  Populate both halves, except for arm64 which
> +	 * currently uses only TTBR0_EL1 (arbitrary selftests "logic"), i.e.
> +	 * only has a valid low half.
> +	 */
> +	sparsebit_num_t nr_va_bits = (1ULL << (vm->va_bits - 1)) >> vm->page_shift;

This is still wrong for arm64. When we say the VA space is 48 bits, we
really do mean that TTBR0 is able to address a full 48 bits. So this
truncates the MSB for the addressing mode.

With the code living in the arm64 side of the shop, I can also tailor
the comment to directly match the architecture to provide breadcrumbs
tying it back to the Arm ARM.

--
Thanks,
Oliver
  
Sean Christopherson Dec. 8, 2022, 1:09 a.m. UTC | #3
On Thu, Dec 08, 2022, Oliver Upton wrote:
> On Thu, Dec 08, 2022 at 12:18:07AM +0000, Sean Christopherson wrote:
> 
> [...]
> 
> > Together, what about?  The #ifdef is a bit gross, especially around "hi_start",
> > but it's less duplicate code.  And IMO, having things bundled in the same place
> > makes it a lot easier for newbies (to arm64 or kernel coding in general) to
> > understand what's going on and why arm64 is different.
> 
> I'd rather we not go this route. We really shouldn't make any attempt to
> de-dupe something that is inherently architecture specific.
> 
> For example:
> 
> > +	/*
> > +	 * All architectures supports splitting the virtual address space into
> > +	 * a high and a low half.  Populate both halves, except for arm64 which
> > +	 * currently uses only TTBR0_EL1 (arbitrary selftests "logic"), i.e.
> > +	 * only has a valid low half.
> > +	 */
> > +	sparsebit_num_t nr_va_bits = (1ULL << (vm->va_bits - 1)) >> vm->page_shift;
> 
> This is still wrong for arm64. When we say the VA space is 48 bits, we
> really do mean that TTBR0 is able to address a full 48 bits. So this
> truncates the MSB for the addressing mode.

Ah, I missed the lack of a "-1" in the arm64 code.

> With the code living in the arm64 side of the shop, I can also tailor
> the comment to directly match the architecture to provide breadcrumbs
> tying it back to the Arm ARM.

The main reason why I don't like splitting the code this way is that it makes it
harder for non-arm64 folks to understand what makes arm64 different.  Case in
point, my overlooking of the "-1".  I read the changelog and the comment and
still missed that small-but-important detail, largely because I am completely
unfamiliar with how TTBR{0,1}_EL1 works.

Actually, before we do anything, we should get confirmation from the s390 and
RISC-V folks on whether they have a canonical hole like x86, i.e. maybe x86 is
the oddball.

Anyways, assuming one architecture is the oddball (I'm betting it's x86), I have
no objection to bleeding some of the details into the common code, including a
large comment to document the gory details.  If every architecture manges to be
different, then yeah, a hook is probably warranted.

That said, I also don't mind shoving a bit of abstraction into arch code if that
avoids some #ifdef ugliness or allows for better documentation, flexibility, etc.
What I don't like is duplicating the logic of turning "VA bits" into the bitmap.

E.g. something like this would also be an option.  Readers would obviously need
to track down has_split_va_space, but that should be fairly easy and can come
with a big arch-specific comment, and meanwhile the core logic of how selftests
populate the va bitmaps is common.

Or if arm64 is the only arch without a split, invert the flag and have arm64 set
the vm->has_combined_va_space or whatever.

static void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
{
	unsigned int eff_va_bits = vm->va_bits;
	sparsebit_num_t nr_bits;

	/* blah blah blah */
	if (vm->has_split_va_space)
		eff_va_bits--;

	nr_bits = (1ULL << eff_va_bits) >> vm->page_shift;

	sparsebit_set_num(vm->vpages_valid, 0, nr_va_bits);

	if (vm->has_split_va_space)
		sparsebit_set_num(vm->vpages_valid,
			  	  (~((1ULL << eff_va_bits) - 1)) >> vm->page_shift,
				  nr_bits);
}
  
Andrew Jones Dec. 8, 2022, 4:23 p.m. UTC | #4
On Thu, Dec 08, 2022 at 01:09:38AM +0000, Sean Christopherson wrote:
...
> Actually, before we do anything, we should get confirmation from the s390 and
> RISC-V folks on whether they have a canonical hole like x86, i.e. maybe x86 is
> the oddball.

riscv splits like x86.

Thanks,
drew
  

Patch

diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index 6cd86da698b3..fbc2a79369b8 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -420,6 +420,7 @@  void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
 struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
+void vm_populate_vaddr_bitmap(struct kvm_vm *vm);
 vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
 vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 316de70db91d..5972a23b2765 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -541,3 +541,13 @@  void kvm_selftest_arch_init(void)
 	 */
 	guest_modes_append_default();
 }
+
+void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
+{
+	/*
+	 * arm64 selftests use only TTBR0_EL1, meaning that the valid VA space
+	 * is [0, 2^(64 - TCR_EL1.T0SZ)).
+	 */
+	sparsebit_set_num(vm->vpages_valid, 0,
+			  (1ULL << vm->va_bits) >> vm->page_shift);
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index e9607eb089be..c88c3ace16d2 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -186,6 +186,15 @@  const struct vm_guest_mode_params vm_guest_mode_params[] = {
 _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
 	       "Missing new mode params?");
 
+__weak void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
+{
+	sparsebit_set_num(vm->vpages_valid,
+		0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
+	sparsebit_set_num(vm->vpages_valid,
+		(~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift,
+		(1ULL << (vm->va_bits - 1)) >> vm->page_shift);
+}
+
 struct kvm_vm *____vm_create(enum vm_guest_mode mode)
 {
 	struct kvm_vm *vm;
@@ -274,11 +283,7 @@  struct kvm_vm *____vm_create(enum vm_guest_mode mode)
 
 	/* Limit to VA-bit canonical virtual addresses. */
 	vm->vpages_valid = sparsebit_alloc();
-	sparsebit_set_num(vm->vpages_valid,
-		0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
-	sparsebit_set_num(vm->vpages_valid,
-		(~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift,
-		(1ULL << (vm->va_bits - 1)) >> vm->page_shift);
+	vm_vaddr_populate_bitmap(vm);
 
 	/* Limit physical addresses to PA-bits. */
 	vm->max_gfn = vm_compute_max_gfn(vm);