[2/4] KVM: selftests: Setup ucall after loading program into guest memory

Message ID 20221207214809.489070-3-oliver.upton@linux.dev
State New
Headers
Series [1/4] KVM: selftests: Fix build due to ucall_uninit() removal |

Commit Message

Oliver Upton Dec. 7, 2022, 9:48 p.m. UTC
  The new ucall infrastructure needs to update a couple of guest globals
to pass through the ucall MMIO addr and pool of ucall structs. A
precondition of this actually working is to have the program image
already loaded into guest memory.

Call ucall_init() after kvm_vm_elf_load(). Continue to park the ucall
MMIO addr after MEM_REGION_TEST_DATA.

Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
---
 tools/testing/selftests/kvm/aarch64/page_fault_test.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)
  

Comments

Sean Christopherson Dec. 7, 2022, 11:57 p.m. UTC | #1
On Wed, Dec 07, 2022, Oliver Upton wrote:
> The new ucall infrastructure needs to update a couple of guest globals
> to pass through the ucall MMIO addr and pool of ucall structs. A
> precondition of this actually working is to have the program image
> already loaded into guest memory.

Ouch.  Might be worth explicitly stating what goes wrong.  Even though it's super
obvious in hindsight, it still took me a few seconds to understand what
precondition you were referring to, e.g. I was trying to figure out how selecting
the MMIO address depended on the guest code being loaded...

> 
> Call ucall_init() after kvm_vm_elf_load(). Continue to park the ucall
> MMIO addr after MEM_REGION_TEST_DATA.
> 
> Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
> ---
>  tools/testing/selftests/kvm/aarch64/page_fault_test.c | 8 +++++++-
>  1 file changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
> index 92d3a91153b6..95d22cfb7b41 100644
> --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c
> +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
> @@ -609,8 +609,13 @@ static void setup_memslots(struct kvm_vm *vm, struct test_params *p)
>  				    data_size / guest_page_size,
>  				    p->test_desc->data_memslot_flags);
>  	vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
> +}
> +
> +static void setup_ucall(struct kvm_vm *vm)
> +{
> +	struct userspace_mem_region *region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
>  
> -	ucall_init(vm, data_gpa + data_size);
> +	ucall_init(vm, region->region.guest_phys_addr + region->region.memory_size);

Isn't there a hole after CODE_AND_DATA_MEMSLOT?  I.e. after memslot 0?  The reason
I ask is because if so, then we can do the temporarily heinous, but hopefully forward
looking thing of adding a helper to wrap kvm_vm_elf_load() + ucall_init().

E.g. I think we can do this immediately, and then at some point in the 6.2 cycle
add a dedicated region+memslot for the ucall MMIO page.

---
 .../selftests/kvm/aarch64/page_fault_test.c   | 10 +------
 .../selftests/kvm/include/kvm_util_base.h     |  1 +
 tools/testing/selftests/kvm/lib/kvm_util.c    | 28 +++++++++++--------
 3 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
index 95d22cfb7b41..68c47db2eb2e 100644
--- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c
+++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
@@ -611,13 +611,6 @@ static void setup_memslots(struct kvm_vm *vm, struct test_params *p)
 	vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
 }
 
-static void setup_ucall(struct kvm_vm *vm)
-{
-	struct userspace_mem_region *region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
-
-	ucall_init(vm, region->region.guest_phys_addr + region->region.memory_size);
-}
-
 static void setup_default_handlers(struct test_desc *test)
 {
 	if (!test->mmio_handler)
@@ -706,8 +699,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
 	vm = ____vm_create(mode);
 	setup_memslots(vm, p);
-	kvm_vm_elf_load(vm, program_invocation_name);
-	setup_ucall(vm);
+	vm_init_guest_code_and_data(vm);
 	vcpu = vm_vcpu_add(vm, 0, guest_code);
 
 	setup_gva_maps(vm);
diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index fbc2a79369b8..175b5ca0c061 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -682,6 +682,7 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
 vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
 			      vm_paddr_t paddr_min, uint32_t memslot);
 vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
+void vm_init_guest_code_and_data(struct kvm_vm *vm);
 
 /*
  * ____vm_create() does KVM_CREATE_VM and little else.  __vm_create() also
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index c88c3ace16d2..0eab6b11a6e9 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -329,12 +329,27 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode,
 	return vm_adjust_num_guest_pages(mode, nr_pages);
 }
 
+void vm_init_guest_code_and_data(struct kvm_vm *vm)
+{
+	struct userspace_mem_region *slot;
+
+	kvm_vm_elf_load(vm, program_invocation_name);
+
+	/*
+	 * TODO: Add a dedicated memory region to carve out MMIO.  KVM treats
+	 * writes to read-only memslots as MMIO, and creating a read-only
+	 * memslot for the MMIO region would prevent silently clobbering the
+	 * MMIO region.
+	 */
+	slot = vm_get_mem_region(vm, MEM_REGION_DATA);
+	ucall_init(vm, slot->region.guest_phys_addr + slot->region.memory_size);
+}
+
 struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus,
 			   uint64_t nr_extra_pages)
 {
 	uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus,
 						 nr_extra_pages);
-	struct userspace_mem_region *slot0;
 	struct kvm_vm *vm;
 	int i;
 
@@ -347,16 +362,7 @@ struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus,
 	for (i = 0; i < NR_MEM_REGIONS; i++)
 		vm->memslots[i] = 0;
 
-	kvm_vm_elf_load(vm, program_invocation_name);
-
-	/*
-	 * TODO: Add proper defines to protect the library's memslots, and then
-	 * carve out memslot1 for the ucall MMIO address.  KVM treats writes to
-	 * read-only memslots as MMIO, and creating a read-only memslot for the
-	 * MMIO region would prevent silently clobbering the MMIO region.
-	 */
-	slot0 = memslot2region(vm, 0);
-	ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size);
+	vm_init_guest_code_and_data(vm);
 
 	kvm_arch_vm_post_create(vm);
 

base-commit: 1cf369f929d607760bf721f3eb9e965ed9c703e3
--
  
Oliver Upton Dec. 8, 2022, 12:17 a.m. UTC | #2
On Wed, Dec 07, 2022 at 11:57:27PM +0000, Sean Christopherson wrote:
> On Wed, Dec 07, 2022, Oliver Upton wrote:
> > The new ucall infrastructure needs to update a couple of guest globals
> > to pass through the ucall MMIO addr and pool of ucall structs. A
> > precondition of this actually working is to have the program image
> > already loaded into guest memory.
> 
> Ouch.  Might be worth explicitly stating what goes wrong.  Even though it's super
> obvious in hindsight, it still took me a few seconds to understand what
> precondition you were referring to, e.g. I was trying to figure out how selecting
> the MMIO address depended on the guest code being loaded...
> 
> > 
> > Call ucall_init() after kvm_vm_elf_load(). Continue to park the ucall
> > MMIO addr after MEM_REGION_TEST_DATA.
> > 
> > Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
> > ---
> >  tools/testing/selftests/kvm/aarch64/page_fault_test.c | 8 +++++++-
> >  1 file changed, 7 insertions(+), 1 deletion(-)
> > 
> > diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
> > index 92d3a91153b6..95d22cfb7b41 100644
> > --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c
> > +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
> > @@ -609,8 +609,13 @@ static void setup_memslots(struct kvm_vm *vm, struct test_params *p)
> >  				    data_size / guest_page_size,
> >  				    p->test_desc->data_memslot_flags);
> >  	vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
> > +}
> > +
> > +static void setup_ucall(struct kvm_vm *vm)
> > +{
> > +	struct userspace_mem_region *region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
> >  
> > -	ucall_init(vm, data_gpa + data_size);
> > +	ucall_init(vm, region->region.guest_phys_addr + region->region.memory_size);
> 
> Isn't there a hole after CODE_AND_DATA_MEMSLOT?  I.e. after memslot 0?

Sure, but that's only guaranteed in the PA space.

> The reason
> I ask is because if so, then we can do the temporarily heinous, but hopefully forward
> looking thing of adding a helper to wrap kvm_vm_elf_load() + ucall_init().
> 
> E.g. I think we can do this immediately, and then at some point in the 6.2 cycle
> add a dedicated region+memslot for the ucall MMIO page.

Even still, that's just a kludge to make ucalls work. We have other
MMIO devices (GIC distributor, for example) that work by chance since
nothing conflicts with the constant GPAs we've selected in the tests.

I'd rather we go down the route of having an address allocator for the
for both the VA and PA spaces to provide carveouts at runtime. There's
another issue with the new ucall implementation where identity mapping
could stomp on a program segment that I'm fighting with right now which
only further highlights the problems with our (mis)management of address
spaces in selftests.

--
Thanks,
Oliver
  
Sean Christopherson Dec. 8, 2022, 12:24 a.m. UTC | #3
On Thu, Dec 08, 2022, Oliver Upton wrote:
> On Wed, Dec 07, 2022 at 11:57:27PM +0000, Sean Christopherson wrote:
> > > diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
> > > index 92d3a91153b6..95d22cfb7b41 100644
> > > --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c
> > > +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
> > > @@ -609,8 +609,13 @@ static void setup_memslots(struct kvm_vm *vm, struct test_params *p)
> > >  				    data_size / guest_page_size,
> > >  				    p->test_desc->data_memslot_flags);
> > >  	vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
> > > +}
> > > +
> > > +static void setup_ucall(struct kvm_vm *vm)
> > > +{
> > > +	struct userspace_mem_region *region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
> > >  
> > > -	ucall_init(vm, data_gpa + data_size);
> > > +	ucall_init(vm, region->region.guest_phys_addr + region->region.memory_size);
> > 
> > Isn't there a hole after CODE_AND_DATA_MEMSLOT?  I.e. after memslot 0?
> 
> Sure, but that's only guaranteed in the PA space.
> 
> > The reason
> > I ask is because if so, then we can do the temporarily heinous, but hopefully forward
> > looking thing of adding a helper to wrap kvm_vm_elf_load() + ucall_init().
> > 
> > E.g. I think we can do this immediately, and then at some point in the 6.2 cycle
> > add a dedicated region+memslot for the ucall MMIO page.
> 
> Even still, that's just a kludge to make ucalls work. We have other
> MMIO devices (GIC distributor, for example) that work by chance since
> nothing conflicts with the constant GPAs we've selected in the tests.
> 
> I'd rather we go down the route of having an address allocator for the
> for both the VA and PA spaces to provide carveouts at runtime.

Aren't those two separate issues?  The PA, a.k.a. memslots space, can be solved
by allocating a dedicated memslot, i.e. doesn't need a carve.  At worst, collisions
will yield very explicit asserts, which IMO is better than whatever might go wrong
with a carve out.

> There's another issue with the new ucall implementation where identity
> mapping could stomp on a program segment that I'm fighting with right now
> which only further highlights the problems with our (mis)management of
> address spaces in selftests.

Oooh, this crud:

 	virt_pg_map(vm, mmio_gpa, mmio_gpa);

Yeah, that needs to be fixed.  But again, that's a separate issue, e.g. selftests
can allocate a virtual address and map the read-only memslot.
  
Oliver Upton Dec. 8, 2022, 12:37 a.m. UTC | #4
On Thu, Dec 08, 2022 at 12:24:20AM +0000, Sean Christopherson wrote:
> On Thu, Dec 08, 2022, Oliver Upton wrote:
> > On Wed, Dec 07, 2022 at 11:57:27PM +0000, Sean Christopherson wrote:
> > > > diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
> > > > index 92d3a91153b6..95d22cfb7b41 100644
> > > > --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c
> > > > +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
> > > > @@ -609,8 +609,13 @@ static void setup_memslots(struct kvm_vm *vm, struct test_params *p)
> > > >  				    data_size / guest_page_size,
> > > >  				    p->test_desc->data_memslot_flags);
> > > >  	vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
> > > > +}
> > > > +
> > > > +static void setup_ucall(struct kvm_vm *vm)
> > > > +{
> > > > +	struct userspace_mem_region *region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
> > > >  
> > > > -	ucall_init(vm, data_gpa + data_size);
> > > > +	ucall_init(vm, region->region.guest_phys_addr + region->region.memory_size);
> > > 
> > > Isn't there a hole after CODE_AND_DATA_MEMSLOT?  I.e. after memslot 0?
> > 
> > Sure, but that's only guaranteed in the PA space.
> > 
> > > The reason
> > > I ask is because if so, then we can do the temporarily heinous, but hopefully forward
> > > looking thing of adding a helper to wrap kvm_vm_elf_load() + ucall_init().
> > > 
> > > E.g. I think we can do this immediately, and then at some point in the 6.2 cycle
> > > add a dedicated region+memslot for the ucall MMIO page.
> > 
> > Even still, that's just a kludge to make ucalls work. We have other
> > MMIO devices (GIC distributor, for example) that work by chance since
> > nothing conflicts with the constant GPAs we've selected in the tests.
> > 
> > I'd rather we go down the route of having an address allocator for the
> > for both the VA and PA spaces to provide carveouts at runtime.
> 
> Aren't those two separate issues?  The PA, a.k.a. memslots space, can be solved
> by allocating a dedicated memslot, i.e. doesn't need a carve.  At worst, collisions
> will yield very explicit asserts, which IMO is better than whatever might go wrong
> with a carve out.

Perhaps the use of the term 'carveout' wasn't right here.

What I'm suggesting is we cannot rely on KVM memslots alone to act as an
allocator for the PA space. KVM can provide devices to the guest that
aren't represented as memslots. If we're trying to fix PA allocations
anyway, why not make it generic enough to suit the needs of things
beyond ucalls?

--
Thanks,
Oliver
  
Ricardo Koller Dec. 8, 2022, 6:47 p.m. UTC | #5
On Thu, Dec 08, 2022 at 12:37:23AM +0000, Oliver Upton wrote:
> On Thu, Dec 08, 2022 at 12:24:20AM +0000, Sean Christopherson wrote:
> > On Thu, Dec 08, 2022, Oliver Upton wrote:
> > > On Wed, Dec 07, 2022 at 11:57:27PM +0000, Sean Christopherson wrote:
> > > > > diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
> > > > > index 92d3a91153b6..95d22cfb7b41 100644
> > > > > --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c
> > > > > +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
> > > > > @@ -609,8 +609,13 @@ static void setup_memslots(struct kvm_vm *vm, struct test_params *p)
> > > > >  				    data_size / guest_page_size,
> > > > >  				    p->test_desc->data_memslot_flags);
> > > > >  	vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
> > > > > +}
> > > > > +
> > > > > +static void setup_ucall(struct kvm_vm *vm)
> > > > > +{
> > > > > +	struct userspace_mem_region *region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
> > > > >  
> > > > > -	ucall_init(vm, data_gpa + data_size);
> > > > > +	ucall_init(vm, region->region.guest_phys_addr + region->region.memory_size);
> > > > 
> > > > Isn't there a hole after CODE_AND_DATA_MEMSLOT?  I.e. after memslot 0?
> > > 
> > > Sure, but that's only guaranteed in the PA space.
> > > 
> > > > The reason
> > > > I ask is because if so, then we can do the temporarily heinous, but hopefully forward
> > > > looking thing of adding a helper to wrap kvm_vm_elf_load() + ucall_init().
> > > > 
> > > > E.g. I think we can do this immediately, and then at some point in the 6.2 cycle
> > > > add a dedicated region+memslot for the ucall MMIO page.
> > > 
> > > Even still, that's just a kludge to make ucalls work. We have other
> > > MMIO devices (GIC distributor, for example) that work by chance since
> > > nothing conflicts with the constant GPAs we've selected in the tests.
> > > 
> > > I'd rather we go down the route of having an address allocator for the
> > > for both the VA and PA spaces to provide carveouts at runtime.
> > 
> > Aren't those two separate issues?  The PA, a.k.a. memslots space, can be solved
> > by allocating a dedicated memslot, i.e. doesn't need a carve.  At worst, collisions
> > will yield very explicit asserts, which IMO is better than whatever might go wrong
> > with a carve out.
> 
> Perhaps the use of the term 'carveout' wasn't right here.
> 
> What I'm suggesting is we cannot rely on KVM memslots alone to act as an
> allocator for the PA space. KVM can provide devices to the guest that
> aren't represented as memslots. If we're trying to fix PA allocations
> anyway, why not make it generic enough to suit the needs of things
> beyond ucalls?

One extra bit of information: in arm, IO is any access to an address (within
bounds) not backed by a memslot. Not the same as x86 where MMIO are writes to
read-only memslots.  No idea what other arches do.

> 
> --
> Thanks,
> Oliver

I think that we should use these proposed changes, and then move to an ideal
solution.  These are the changes I propose:

1. add an arch specific API for allocating MMIO physical ranges:
vm_arch_mmio_region_add(vm, npages).  The x86 version creates a read-only
memslot, and the arm one allocates physical space without a memslot in it.

2. Then change all IO related users (including ucall) to use
vm_arch_mmio_region_add(). Ex:

	pa = vm_arch_mmio_region_add(vm, npages);
	ucall_init(vm, pa);

page_fault_test needs to be adapted to use vm_arch_mmio_region_add() as well.

Thanks,
Ricardo
  
Sean Christopherson Dec. 8, 2022, 7:01 p.m. UTC | #6
On Thu, Dec 08, 2022, Ricardo Koller wrote:
> On Thu, Dec 08, 2022 at 12:37:23AM +0000, Oliver Upton wrote:
> > On Thu, Dec 08, 2022 at 12:24:20AM +0000, Sean Christopherson wrote:
> > > > Even still, that's just a kludge to make ucalls work. We have other
> > > > MMIO devices (GIC distributor, for example) that work by chance since
> > > > nothing conflicts with the constant GPAs we've selected in the tests.
> > > > 
> > > > I'd rather we go down the route of having an address allocator for the
> > > > for both the VA and PA spaces to provide carveouts at runtime.
> > > 
> > > Aren't those two separate issues?  The PA, a.k.a. memslots space, can be solved
> > > by allocating a dedicated memslot, i.e. doesn't need a carve.  At worst, collisions
> > > will yield very explicit asserts, which IMO is better than whatever might go wrong
> > > with a carve out.
> > 
> > Perhaps the use of the term 'carveout' wasn't right here.
> > 
> > What I'm suggesting is we cannot rely on KVM memslots alone to act as an
> > allocator for the PA space. KVM can provide devices to the guest that
> > aren't represented as memslots. If we're trying to fix PA allocations
> > anyway, why not make it generic enough to suit the needs of things
> > beyond ucalls?
> 
> One extra bit of information: in arm, IO is any access to an address (within
> bounds) not backed by a memslot. Not the same as x86 where MMIO are writes to
> read-only memslots.  No idea what other arches do.

I don't think that's correct, doesn't this code turn write abort on a RO memslot
into an io_mem_abort()?  Specifically, the "(write_fault && !writable)" check will
match, and assuming none the the edge cases in the if-statement fire, KVM will
send the write down io_mem_abort().

	gfn = fault_ipa >> PAGE_SHIFT;
	memslot = gfn_to_memslot(vcpu->kvm, gfn);
	hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
	write_fault = kvm_is_write_fault(vcpu);
	if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
		/*
		 * The guest has put either its instructions or its page-tables
		 * somewhere it shouldn't have. Userspace won't be able to do
		 * anything about this (there's no syndrome for a start), so
		 * re-inject the abort back into the guest.
		 */
		if (is_iabt) {
			ret = -ENOEXEC;
			goto out;
		}

		if (kvm_vcpu_abt_iss1tw(vcpu)) {
			kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
			ret = 1;
			goto out_unlock;
		}

		/*
		 * Check for a cache maintenance operation. Since we
		 * ended-up here, we know it is outside of any memory
		 * slot. But we can't find out if that is for a device,
		 * or if the guest is just being stupid. The only thing
		 * we know for sure is that this range cannot be cached.
		 *
		 * So let's assume that the guest is just being
		 * cautious, and skip the instruction.
		 */
		if (kvm_is_error_hva(hva) && kvm_vcpu_dabt_is_cm(vcpu)) {
			kvm_incr_pc(vcpu);
			ret = 1;
			goto out_unlock;
		}

		/*
		 * The IPA is reported as [MAX:12], so we need to
		 * complement it with the bottom 12 bits from the
		 * faulting VA. This is always 12 bits, irrespective
		 * of the page size.
		 */
		fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
		ret = io_mem_abort(vcpu, fault_ipa);
		goto out_unlock;
	}
  
Ricardo Koller Dec. 8, 2022, 7:49 p.m. UTC | #7
On Thu, Dec 08, 2022 at 07:01:57PM +0000, Sean Christopherson wrote:
> On Thu, Dec 08, 2022, Ricardo Koller wrote:
> > On Thu, Dec 08, 2022 at 12:37:23AM +0000, Oliver Upton wrote:
> > > On Thu, Dec 08, 2022 at 12:24:20AM +0000, Sean Christopherson wrote:
> > > > > Even still, that's just a kludge to make ucalls work. We have other
> > > > > MMIO devices (GIC distributor, for example) that work by chance since
> > > > > nothing conflicts with the constant GPAs we've selected in the tests.
> > > > > 
> > > > > I'd rather we go down the route of having an address allocator for the
> > > > > for both the VA and PA spaces to provide carveouts at runtime.
> > > > 
> > > > Aren't those two separate issues?  The PA, a.k.a. memslots space, can be solved
> > > > by allocating a dedicated memslot, i.e. doesn't need a carve.  At worst, collisions
> > > > will yield very explicit asserts, which IMO is better than whatever might go wrong
> > > > with a carve out.
> > > 
> > > Perhaps the use of the term 'carveout' wasn't right here.
> > > 
> > > What I'm suggesting is we cannot rely on KVM memslots alone to act as an
> > > allocator for the PA space. KVM can provide devices to the guest that
> > > aren't represented as memslots. If we're trying to fix PA allocations
> > > anyway, why not make it generic enough to suit the needs of things
> > > beyond ucalls?
> > 
> > One extra bit of information: in arm, IO is any access to an address (within
> > bounds) not backed by a memslot. Not the same as x86 where MMIO are writes to
> > read-only memslots.  No idea what other arches do.
> 
> I don't think that's correct, doesn't this code turn write abort on a RO memslot
> into an io_mem_abort()?  Specifically, the "(write_fault && !writable)" check will
> match, and assuming none the the edge cases in the if-statement fire, KVM will
> send the write down io_mem_abort().

You are right. In fact, page_fault_test checks precisely that: writes on
RO memslots are sent to userspace as an mmio exit. I was just referring
to the MMIO done for ucall.

Having said that, we could use ucall as writes on read-only memslots
like what x86 does.

> 
> 	gfn = fault_ipa >> PAGE_SHIFT;
> 	memslot = gfn_to_memslot(vcpu->kvm, gfn);
> 	hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
> 	write_fault = kvm_is_write_fault(vcpu);
> 	if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
> 		/*
> 		 * The guest has put either its instructions or its page-tables
> 		 * somewhere it shouldn't have. Userspace won't be able to do
> 		 * anything about this (there's no syndrome for a start), so
> 		 * re-inject the abort back into the guest.
> 		 */
> 		if (is_iabt) {
> 			ret = -ENOEXEC;
> 			goto out;
> 		}
> 
> 		if (kvm_vcpu_abt_iss1tw(vcpu)) {
> 			kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
> 			ret = 1;
> 			goto out_unlock;
> 		}
> 
> 		/*
> 		 * Check for a cache maintenance operation. Since we
> 		 * ended-up here, we know it is outside of any memory
> 		 * slot. But we can't find out if that is for a device,
> 		 * or if the guest is just being stupid. The only thing
> 		 * we know for sure is that this range cannot be cached.
> 		 *
> 		 * So let's assume that the guest is just being
> 		 * cautious, and skip the instruction.
> 		 */
> 		if (kvm_is_error_hva(hva) && kvm_vcpu_dabt_is_cm(vcpu)) {
> 			kvm_incr_pc(vcpu);
> 			ret = 1;
> 			goto out_unlock;
> 		}
> 
> 		/*
> 		 * The IPA is reported as [MAX:12], so we need to
> 		 * complement it with the bottom 12 bits from the
> 		 * faulting VA. This is always 12 bits, irrespective
> 		 * of the page size.
> 		 */
> 		fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1);
> 		ret = io_mem_abort(vcpu, fault_ipa);
> 		goto out_unlock;
> 	}
  
Sean Christopherson Dec. 9, 2022, 1:08 a.m. UTC | #8
On Thu, Dec 08, 2022, Ricardo Koller wrote:
> On Thu, Dec 08, 2022 at 07:01:57PM +0000, Sean Christopherson wrote:
> > On Thu, Dec 08, 2022, Ricardo Koller wrote:
> > > On Thu, Dec 08, 2022 at 12:37:23AM +0000, Oliver Upton wrote:
> > > > On Thu, Dec 08, 2022 at 12:24:20AM +0000, Sean Christopherson wrote:
> > > > > > Even still, that's just a kludge to make ucalls work. We have other
> > > > > > MMIO devices (GIC distributor, for example) that work by chance since
> > > > > > nothing conflicts with the constant GPAs we've selected in the tests.
> > > > > > 
> > > > > > I'd rather we go down the route of having an address allocator for the
> > > > > > for both the VA and PA spaces to provide carveouts at runtime.
> > > > > 
> > > > > Aren't those two separate issues?  The PA, a.k.a. memslots space, can be solved
> > > > > by allocating a dedicated memslot, i.e. doesn't need a carve.  At worst, collisions
> > > > > will yield very explicit asserts, which IMO is better than whatever might go wrong
> > > > > with a carve out.
> > > > 
> > > > Perhaps the use of the term 'carveout' wasn't right here.
> > > > 
> > > > What I'm suggesting is we cannot rely on KVM memslots alone to act as an
> > > > allocator for the PA space. KVM can provide devices to the guest that
> > > > aren't represented as memslots. If we're trying to fix PA allocations
> > > > anyway, why not make it generic enough to suit the needs of things
> > > > beyond ucalls?
> > > 
> > > One extra bit of information: in arm, IO is any access to an address (within
> > > bounds) not backed by a memslot. Not the same as x86 where MMIO are writes to
> > > read-only memslots.  No idea what other arches do.
> > 
> > I don't think that's correct, doesn't this code turn write abort on a RO memslot
> > into an io_mem_abort()?  Specifically, the "(write_fault && !writable)" check will
> > match, and assuming none the the edge cases in the if-statement fire, KVM will
> > send the write down io_mem_abort().
> 
> You are right. In fact, page_fault_test checks precisely that: writes on
> RO memslots are sent to userspace as an mmio exit. I was just referring
> to the MMIO done for ucall.

To clarify for others, Ricardo thought that x86 selftests were already using a
read-only memslot for ucalls, hence the confusion.

> Having said that, we could use ucall as writes on read-only memslots
> like what x86 does.

+1.  x86 currently uses I/O with a hardcoded port, but theoretically that's just
as error prone as hardcoding a GPA, it just works because x86 doesn't have any
port I/O tests.

Ugh, and that made me look at sync_regs_test.c, which does its own open coded
ucall.  That thing is probably working by dumb luck at this point.
  

Patch

diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
index 92d3a91153b6..95d22cfb7b41 100644
--- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c
+++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
@@ -609,8 +609,13 @@  static void setup_memslots(struct kvm_vm *vm, struct test_params *p)
 				    data_size / guest_page_size,
 				    p->test_desc->data_memslot_flags);
 	vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
+}
+
+static void setup_ucall(struct kvm_vm *vm)
+{
+	struct userspace_mem_region *region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
 
-	ucall_init(vm, data_gpa + data_size);
+	ucall_init(vm, region->region.guest_phys_addr + region->region.memory_size);
 }
 
 static void setup_default_handlers(struct test_desc *test)
@@ -702,6 +707,7 @@  static void run_test(enum vm_guest_mode mode, void *arg)
 	vm = ____vm_create(mode);
 	setup_memslots(vm, p);
 	kvm_vm_elf_load(vm, program_invocation_name);
+	setup_ucall(vm);
 	vcpu = vm_vcpu_add(vm, 0, guest_code);
 
 	setup_gva_maps(vm);