[v10,19/26] gunyah: vm_mgr: Add framework to add VM Functions

Message ID 20230214212531.3323284-1-quic_eberman@quicinc.com
State New
Headers
Series Drivers for Gunyah hypervisor |

Commit Message

Elliot Berman Feb. 14, 2023, 9:25 p.m. UTC
  Introduce a framework for Gunyah userspace to install VM functions. VM
functions are optional interfaces to the virtual machine. vCPUs,
ioeventfs, and irqfds are examples of such VM functions and are
implemented in subsequent patches.

A generic framework is implemented instead of individual ioctls to
create vCPUs, irqfds, etc., in order to simplify the VM manager core
implementation and allow dynamic loading of VM function modules.

Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
---
 Documentation/virt/gunyah/vm-manager.rst |  18 ++
 drivers/virt/gunyah/vm_mgr.c             | 240 ++++++++++++++++++++++-
 drivers/virt/gunyah/vm_mgr.h             |   3 +
 include/linux/gunyah_vm_mgr.h            |  80 ++++++++
 include/uapi/linux/gunyah.h              |  17 ++
 5 files changed, 353 insertions(+), 5 deletions(-)
 create mode 100644 include/linux/gunyah_vm_mgr.h
  

Comments

Srivatsa Vaddagiri Feb. 17, 2023, 1:23 p.m. UTC | #1
* Elliot Berman <quic_eberman@quicinc.com> [2023-02-14 13:25:30]:

> +static long gh_vm_add_function(struct gh_vm *ghvm, struct gh_fn_desc *f)
> +{
> +	struct gh_vm_function_instance *inst;
> +	void __user *argp;
> +	long r = 0;
> +
> +	if (f->arg_size > GH_FN_MAX_ARG_SIZE)
> +		return -EINVAL;
> +
> +	inst = kzalloc(sizeof(*inst), GFP_KERNEL);
> +	if (!inst)
> +		return -ENOMEM;
> +
> +	inst->arg_size = f->arg_size;
> +	if (inst->arg_size) {
> +		inst->argp = kzalloc(inst->arg_size, GFP_KERNEL);
> +		if (!inst->arg) {

if (!inst->argp) ?


> +			r = -ENOMEM;
> +			goto free;
> +		}
  
Srivatsa Vaddagiri Feb. 21, 2023, 1:07 p.m. UTC | #2
* Elliot Berman <quic_eberman@quicinc.com> [2023-02-14 13:25:30]:

> +int __must_check gh_vm_get(struct gh_vm *ghvm)

Minor comment: 

get_gh_rm vs gh_vm_get -> can follow some consistent convention I think.

Perhaps get_gh_vm()?


> +{
> +	return kref_get_unless_zero(&ghvm->kref);
> +}
> +EXPORT_SYMBOL_GPL(gh_vm_get);
  
Srinivas Kandagatla Feb. 21, 2023, 5:58 p.m. UTC | #3
On 21/02/2023 13:07, Srivatsa Vaddagiri wrote:
> * Elliot Berman <quic_eberman@quicinc.com> [2023-02-14 13:25:30]:
> 
>> +int __must_check gh_vm_get(struct gh_vm *ghvm)
> 
> Minor comment:
> 
> get_gh_rm vs gh_vm_get -> can follow some consistent convention I think.
> 
> Perhaps get_gh_vm()?

it should be other way around

currently we have combinations of gh_vm and some other pattern, we 
should stick with one, in this case gh_vm_* or gh_rm_* makes more sense

here are all the exported symbols in gunyah.

./drivers/virt/gunyah/vm_mgr.c:EXPORT_SYMBOL_GPL(gh_vm_function_register);
./drivers/virt/gunyah/vm_mgr.c:EXPORT_SYMBOL_GPL(gh_vm_function_unregister);
./drivers/virt/gunyah/vm_mgr.c:EXPORT_SYMBOL_GPL(gh_vm_add_resource_ticket);
./drivers/virt/gunyah/vm_mgr.c:EXPORT_SYMBOL_GPL(gh_vm_remove_resource_ticket);
./drivers/virt/gunyah/vm_mgr.c:EXPORT_SYMBOL_GPL(gh_vm_mmio_write);
./drivers/virt/gunyah/vm_mgr.c:EXPORT_SYMBOL_GPL(gh_vm_add_io_handler);
./drivers/virt/gunyah/vm_mgr.c:EXPORT_SYMBOL_GPL(gh_vm_remove_io_handler);
./drivers/virt/gunyah/vm_mgr.c:EXPORT_SYMBOL_GPL(gh_vm_get);
./drivers/virt/gunyah/vm_mgr.c:EXPORT_SYMBOL_GPL(gh_vm_put);
./drivers/virt/gunyah/rsc_mgr.c:EXPORT_SYMBOL_GPL(gh_rm_notifier_register);
./drivers/virt/gunyah/rsc_mgr.c:EXPORT_SYMBOL_GPL(gh_rm_notifier_unregister);
./drivers/virt/gunyah/rsc_mgr.c:EXPORT_SYMBOL_GPL(get_gh_rm);
./drivers/virt/gunyah/rsc_mgr.c:EXPORT_SYMBOL_GPL(put_gh_rm);
./drivers/virt/gunyah/gunyah.c:EXPORT_SYMBOL_GPL(gh_api_version);
./drivers/virt/gunyah/gunyah.c:EXPORT_SYMBOL_GPL(gh_api_has_feature);
./drivers/virt/gunyah/rsc_mgr_rpc.c:EXPORT_SYMBOL_GPL(gh_rm_get_vmid);
./drivers/virt/gunyah/gunyah_platform_hooks.c:EXPORT_SYMBOL_GPL(gh_rm_platform_pre_mem_share);
./drivers/virt/gunyah/gunyah_platform_hooks.c:EXPORT_SYMBOL_GPL(gh_rm_platform_post_mem_reclaim);
./drivers/virt/gunyah/gunyah_platform_hooks.c:EXPORT_SYMBOL_GPL(gh_rm_register_platform_ops);
./drivers/virt/gunyah/gunyah_platform_hooks.c:EXPORT_SYMBOL_GPL(gh_rm_unregister_platform_ops);
./drivers/virt/gunyah/gunyah_platform_hooks.c:EXPORT_SYMBOL_GPL(devm_gh_rm_register_platform_ops);

> 
> 
>> +{
>> +	return kref_get_unless_zero(&ghvm->kref);
>> +}
>> +EXPORT_SYMBOL_GPL(gh_vm_get);
  
Srinivas Kandagatla Feb. 22, 2023, 2:08 p.m. UTC | #4
On 14/02/2023 21:25, Elliot Berman wrote:
> 
> Introduce a framework for Gunyah userspace to install VM functions. VM
> functions are optional interfaces to the virtual machine. vCPUs,
> ioeventfs, and irqfds are examples of such VM functions and are
> implemented in subsequent patches.
> 
> A generic framework is implemented instead of individual ioctls to
> create vCPUs, irqfds, etc., in order to simplify the VM manager core
> implementation and allow dynamic loading of VM function modules.
> 
> Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
> ---
>   Documentation/virt/gunyah/vm-manager.rst |  18 ++
>   drivers/virt/gunyah/vm_mgr.c             | 240 ++++++++++++++++++++++-
>   drivers/virt/gunyah/vm_mgr.h             |   3 +
>   include/linux/gunyah_vm_mgr.h            |  80 ++++++++
>   include/uapi/linux/gunyah.h              |  17 ++
>   5 files changed, 353 insertions(+), 5 deletions(-)
>   create mode 100644 include/linux/gunyah_vm_mgr.h
> 
> diff --git a/Documentation/virt/gunyah/vm-manager.rst b/Documentation/virt/gunyah/vm-manager.rst
> index c0126cfeadc7..5272a6e9145c 100644
> --- a/Documentation/virt/gunyah/vm-manager.rst
> +++ b/Documentation/virt/gunyah/vm-manager.rst
> @@ -17,6 +17,24 @@ sharing userspace memory with a VM is done via the GH_VM_SET_USER_MEM_REGION
>   ioctl. The VM itself is configured to use the memory region via the
>   devicetree.
>   
> +Gunyah Functions
> +================
> +
> +Components of a Gunyah VM's configuration that need kernel configuration are
> +called "functions" and are built on top of a framework. Functions are identified
> +by a string and have some argument(s) to configure them. They are typically
> +created by the `GH_VM_ADD_FUNCTION` ioctl.
> +
> +Functions typically will always do at least one of these operations:
> +
> +1. Create resource ticket(s). Resource tickets allow a function to register
> +   itself as the client for a Gunyah resource (e.g. doorbell or vCPU) and
> +   the function is given the pointer to the `struct gunyah_resource` when the
> +   VM is starting.
> +
> +2. Register IO handler(s). IO handlers allow a function to handle stage-2 faults
> +   from the virtual machine.
> +
>   Sample Userspace VMM
>   ====================
>   
> diff --git a/drivers/virt/gunyah/vm_mgr.c b/drivers/virt/gunyah/vm_mgr.c
> index fa324385ade5..e9c55e7dd1b3 100644
> --- a/drivers/virt/gunyah/vm_mgr.c
> +++ b/drivers/virt/gunyah/vm_mgr.c
> @@ -6,8 +6,10 @@
>   #define pr_fmt(fmt) "gh_vm_mgr: " fmt
>   
>   #include <linux/anon_inodes.h>
> +#include <linux/compat.h>
>   #include <linux/file.h>
>   #include <linux/gunyah_rsc_mgr.h>
> +#include <linux/gunyah_vm_mgr.h>
>   #include <linux/miscdevice.h>
>   #include <linux/mm.h>
>   #include <linux/module.h>
> @@ -16,6 +18,177 @@
>   
>   #include "vm_mgr.h"
>   
> +static DEFINE_MUTEX(functions_lock);
> +static DEFINE_IDR(functions);
Why are these global? Can these be not part of struc gh_rm?
Not to mention please move idr to xarrays.

> +
> +int gh_vm_function_register(struct gh_vm_function *drv)
> +{
> +	int ret = 0;
> +
> +	if (!drv->bind || !drv->unbind)
> +		return -EINVAL;
> +
> +	mutex_lock(&functions_lock);
> +	if (idr_find(&functions, drv->type)) {
> +		ret = -EEXIST;
> +		goto out;
> +	}
> +
> +	INIT_LIST_HEAD(&drv->instances);
> +	ret = idr_alloc(&functions, drv, drv->type, drv->type + 1, GFP_KERNEL);
> +	if (ret > 0)
> +		ret = 0;
> +out:
> +	mutex_unlock(&functions_lock);
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(gh_vm_function_register);
> +
> +static void gh_vm_remove_function_instance(struct gh_vm_function_instance *inst)
> +	__must_hold(functions_lock)
> +{
> +	inst->fn->unbind(inst);
> +	list_del(&inst->vm_list);
> +	list_del(&inst->fn_list);
> +	module_put(inst->fn->mod);
> +	if (inst->arg_size)
> +		kfree(inst->argp);
> +	kfree(inst);
> +}
> +
> +void gh_vm_function_unregister(struct gh_vm_function *fn)
> +{
> +	struct gh_vm_function_instance *inst, *iter;
> +
> +	mutex_lock(&functions_lock);
> +	list_for_each_entry_safe(inst, iter, &fn->instances, fn_list)
> +		gh_vm_remove_function_instance(inst);

We should never have any instances as we have refcounted the module.

If there are any instances then its clearly a bug, as this will pull out 
function under the hood while userspace is using it.


> +	idr_remove(&functions, fn->type);
> +	mutex_unlock(&functions_lock);
> +}
> +EXPORT_SYMBOL_GPL(gh_vm_function_unregister);
> +
> +static long gh_vm_add_function(struct gh_vm *ghvm, struct gh_fn_desc *f)
> +{
> +	struct gh_vm_function_instance *inst;
> +	void __user *argp;
> +	long r = 0;
> +
> +	if (f->arg_size > GH_FN_MAX_ARG_SIZE)

lets print some useful error message to user.

> +		return -EINVAL;
> +
> +	inst = kzalloc(sizeof(*inst), GFP_KERNEL);
> +	if (!inst)
> +		return -ENOMEM;
> +
> +	inst->arg_size = f->arg_size;
> +	if (inst->arg_size) {
> +		inst->argp = kzalloc(inst->arg_size, GFP_KERNEL);
> +		if (!inst->arg) {
> +			r = -ENOMEM;
> +			goto free;
> +		}
> +
> +		argp = is_compat_task() ? compat_ptr(f->arg) : (void __user *) f->arg;

hmm, arg is not a data pointer it is a fixed size variable (__u64 arg), 
so why are using compat_ptr() here?

you should be able to do

argp = u64_to_user_ptr(f->arg);

> +		if (copy_from_user(inst->argp, argp, f->arg_size)) {
> +			r = -EFAULT;
> +			goto free_arg;
> +		}
> +	} else {
> +		inst->arg = f->arg;
bit lost here, so, we treat the arg as both pointer and value in cases 
where size is zero.

> +	}
> +
<---
> +	mutex_lock(&functions_lock);
> +	inst->fn = idr_find(&functions, f->type);
> +	if (!inst->fn) {
> +		mutex_unlock(&functions_lock);
> +		r = request_module("ghfunc:%d", f->type);
> +		if (r)
> +			goto unlock_free;
> +
> +		mutex_lock(&functions_lock);
> +		inst->fn = idr_find(&functions, f->type);
> +	}
> +
> +	if (!inst->fn) {
> +		r = -ENOENT;
> +		goto unlock_free;
> +	}
> +
> +	if (!try_module_get(inst->fn->mod)) {
> +		r = -ENOENT;
> +		inst->fn = NULL;
> +		goto unlock_free;
> +	}
> +
--->
can we do this snippet as a gh_vm_get_function() and corresponding 
gh_vm_put_function(). that should make the code more cleaner.


> +	inst->ghvm = ghvm;
> +	inst->rm = ghvm->rm;
> +
> +	r = inst->fn->bind(inst);
> +	if (r < 0) {
> +		module_put(inst->fn->mod);
> +		goto unlock_free;
> +	}
> +
> +	list_add(&inst->vm_list, &ghvm->functions);

I guess its possible to add same functions with same argumentso to this 
list, how are we preventing this to happen?

Is it a valid usecase?

> +	list_add(&inst->fn_list, &inst->fn->instances);
> +	mutex_unlock(&functions_lock);
> +	return r;
> +unlock_free:
> +	mutex_unlock(&functions_lock);
> +free_arg:
> +	if (inst->arg_size)
> +		kfree(inst->argp);
> +free:
> +	kfree(inst);
> +	return r;
> +}
> +
> +static long gh_vm_rm_function(struct gh_vm *ghvm, struct gh_fn_desc *f)
> +{
> +	struct gh_vm_function_instance *inst, *iter;
> +	void __user *user_argp;
> +	void *argp;
> +	long r = 0;
> +
> +	r = mutex_lock_interruptible(&functions_lock);
> +	if (r)
> +		return r;
> +
> +	if (f->arg_size) {
> +		argp = kzalloc(f->arg_size, GFP_KERNEL);
> +		if (!argp) {
> +			r = -ENOMEM;
> +			goto out;
> +		}
> +
> +		user_argp = is_compat_task() ? compat_ptr(f->arg) : (void __user *) f->arg;

same comment as add;

> +		if (copy_from_user(argp, user_argp, f->arg_size)) {
> +			r = -EFAULT;
> +			kfree(argp);
> +			goto out;
> +		}
> +
> +		list_for_each_entry_safe(inst, iter, &ghvm->functions, vm_list) {
> +			if (inst->fn->type == f->type &&
> +				f->arg_size == inst->arg_size &&
> +				!memcmp(argp, inst->argp, f->arg_size))
> +				gh_vm_remove_function_instance(inst);
> +		}

leaking argp;

> +	} else {
> +		list_for_each_entry_safe(inst, iter, &ghvm->functions, vm_list) {
> +			if (inst->fn->type == f->type &&
> +				f->arg_size == inst->arg_size &&
> +				inst->arg == f->arg)
> +				gh_vm_remove_function_instance(inst);
> +		}
> +	}
> +
> +out:
> +	mutex_unlock(&functions_lock);
> +	return r;
> +}
> +
>   static int gh_vm_rm_notification_status(struct gh_vm *ghvm, void *data)
>   {
>   	struct gh_rm_vm_status_payload *payload = data;
> @@ -80,6 +253,7 @@ static void gh_vm_stop(struct gh_vm *ghvm)
>   static void gh_vm_free(struct work_struct *work)
>   {
>   	struct gh_vm *ghvm = container_of(work, struct gh_vm, free_work);
> +	struct gh_vm_function_instance *inst, *iiter;
>   	struct gh_vm_mem *mapping, *tmp;
>   	int ret;
>   
> @@ -90,7 +264,13 @@ static void gh_vm_free(struct work_struct *work)
>   		fallthrough;
>   	case GH_RM_VM_STATUS_INIT_FAILED:
>   	case GH_RM_VM_STATUS_LOAD:
> -	case GH_RM_VM_STATUS_LOAD_FAILED:
> +	case GH_RM_VM_STATUS_EXITED:
> +		mutex_lock(&functions_lock);
> +		list_for_each_entry_safe(inst, iiter, &ghvm->functions, vm_list) {
> +			gh_vm_remove_function_instance(inst);
> +		}
> +		mutex_unlock(&functions_lock);
> +
>   		mutex_lock(&ghvm->mm_lock);
>   		list_for_each_entry_safe(mapping, tmp, &ghvm->memory_mappings, list) {
>   			gh_vm_mem_reclaim(ghvm, mapping);
> @@ -113,6 +293,28 @@ static void gh_vm_free(struct work_struct *work)
>   	}
>   }
>   
> +static void _gh_vm_put(struct kref *kref)
> +{
> +	struct gh_vm *ghvm = container_of(kref, struct gh_vm, kref);
> +
> +	/* VM will be reset and make RM calls which can interruptible sleep.
> +	 * Defer to a work so this thread can receive signal.
> +	 */
> +	schedule_work(&ghvm->free_work);
> +}
> +
> +int __must_check gh_vm_get(struct gh_vm *ghvm)
> +{
> +	return kref_get_unless_zero(&ghvm->kref);
> +}
> +EXPORT_SYMBOL_GPL(gh_vm_get);
> +
> +void gh_vm_put(struct gh_vm *ghvm)
> +{
> +	kref_put(&ghvm->kref, _gh_vm_put);
> +}
> +EXPORT_SYMBOL_GPL(gh_vm_put);
> +
>   static __must_check struct gh_vm *gh_vm_alloc(struct gh_rm *rm)
>   {
>   	struct gh_vm *ghvm;
> @@ -147,6 +349,8 @@ static __must_check struct gh_vm *gh_vm_alloc(struct gh_rm *rm)
>   	INIT_LIST_HEAD(&ghvm->memory_mappings);
>   	init_rwsem(&ghvm->status_lock);
>   	INIT_WORK(&ghvm->free_work, gh_vm_free);
> +	kref_init(&ghvm->kref);
> +	INIT_LIST_HEAD(&ghvm->functions);
>   	ghvm->vm_status = GH_RM_VM_STATUS_LOAD;
>   
>   	return ghvm;
> @@ -291,6 +495,35 @@ static long gh_vm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
>   		r = gh_vm_ensure_started(ghvm);
>   		break;
>   	}
> +	case GH_VM_ADD_FUNCTION: {
> +		struct gh_fn_desc *f;
> +
> +		f = kzalloc(sizeof(*f), GFP_KERNEL);
> +		if (!f)
> +			return -ENOMEM;
> +
> +		if (copy_from_user(f, argp, sizeof(*f)))
> +			return -EFAULT;
> +
> +		r = gh_vm_add_function(ghvm, f);
> +		if (r < 0)
> +			kfree(f);


we are memory leaking f here, we should free it irrespective of return 
value. or I see no reason not to use this small struct from stack.


> +		break;
> +	}
> +	case GH_VM_REMOVE_FUNCTION: {
> +		struct gh_fn_desc *f;
> +
> +		f = kzalloc(sizeof(*f), GFP_KERNEL);
> +		if (!f)
> +			return -ENOMEM;
> +
> +		if (copy_from_user(f, argp, sizeof(*f)))
> +			return -EFAULT;
> +
> +		r = gh_vm_rm_function(ghvm, f);
> +		kfree(f);
> +		break;
> +	}
>   	default:
>   		r = -ENOTTY;
>   		break;
> @@ -303,10 +536,7 @@ static int gh_vm_release(struct inode *inode, struct file *filp)
>   {
>   	struct gh_vm *ghvm = filp->private_data;
>   
> -	/* VM will be reset and make RM calls which can interruptible sleep.
> -	 * Defer to a work so this thread can receive signal.
> -	 */
> -	schedule_work(&ghvm->free_work);
> +	gh_vm_put(ghvm);
>   	return 0;
>   }
>   
> diff --git a/drivers/virt/gunyah/vm_mgr.h b/drivers/virt/gunyah/vm_mgr.h
> index e9cf56647cc2..4750d56c1297 100644
> --- a/drivers/virt/gunyah/vm_mgr.h
> +++ b/drivers/virt/gunyah/vm_mgr.h
> @@ -8,6 +8,7 @@
>   
>   #include <linux/gunyah_rsc_mgr.h>
>   #include <linux/list.h>
> +#include <linux/kref.h>
>   #include <linux/miscdevice.h>
>   #include <linux/mutex.h>
>   #include <linux/rwsem.h>
> @@ -44,8 +45,10 @@ struct gh_vm {
>   	struct rw_semaphore status_lock;
>   
>   	struct work_struct free_work;
> +	struct kref kref;
>   	struct mutex mm_lock;
>   	struct list_head memory_mappings;
> +	struct list_head functions;
>   };
>   
>   int gh_vm_mem_alloc(struct gh_vm *ghvm, struct gh_userspace_memory_region *region);
> diff --git a/include/linux/gunyah_vm_mgr.h b/include/linux/gunyah_vm_mgr.h
> new file mode 100644
> index 000000000000..f0a95af50b2e
> --- /dev/null
> +++ b/include/linux/gunyah_vm_mgr.h
> @@ -0,0 +1,80 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
> + */
> +
> +#ifndef _GUNYAH_VM_MGR_H
> +#define _GUNYAH_VM_MGR_H
> +
> +#include <linux/compiler_types.h>
> +#include <linux/gunyah.h>
> +#include <linux/gunyah_rsc_mgr.h>
> +#include <linux/list.h>
> +#include <linux/mod_devicetable.h>
??

> +#include <linux/notifier.h>

??

> +
> +#include <uapi/linux/gunyah.h>
> +
> +struct gh_vm;
> +
> +int __must_check gh_vm_get(struct gh_vm *ghvm);
> +void gh_vm_put(struct gh_vm *ghvm);
> +
> +struct gh_vm_function_instance;
> +struct gh_vm_function {
> +	u32 type; > +	const char *name;
> +	struct module *mod;
> +	long (*bind)(struct gh_vm_function_instance *f);
> +	void (*unbind)(struct gh_vm_function_instance *f);
> +	struct mutex instances_lock;
> +	struct list_head instances;
> +};
> +
> +/**
> + * struct gh_vm_function_instance - Represents one function instance
> + * @arg_size: size of user argument
> + * @arg: user argument to describe the function instance; arg_size is 0
> + * @argp: pointer to user argument
> + * @ghvm: Pointer to VM instance
> + * @rm: Pointer to resource manager for the VM instance
> + * @fn: The ops for the function
> + * @data: Private data for function
> + * @vm_list: for gh_vm's functions list
> + * @fn_list: for gh_vm_function's instances list
> + */
> +struct gh_vm_function_instance {
> +	size_t arg_size;
> +	union {
> +		u64 arg;
> +		void *argp;
> +	};
> +	struct gh_vm *ghvm;
> +	struct gh_rm *rm;
> +	struct gh_vm_function *fn;
> +	void *data;
> +	struct list_head vm_list;
> +	struct list_head fn_list;
Am not seeing any advantage of storing the instance in two different 
list, they look redundant to me. storing the function instances in vm 
should be good IMO.


> +};
> +
> +int gh_vm_function_register(struct gh_vm_function *f);
> +void gh_vm_function_unregister(struct gh_vm_function *f);
> +
> +#define DECLARE_GUNYAH_VM_FUNCTION(_name, _type, _bind, _unbind)	\
> +	static struct gh_vm_function _name = {		\
> +		.type = _type,						\
> +		.name = __stringify(_name),				\
> +		.mod = THIS_MODULE,					\
> +		.bind = _bind,						\
> +		.unbind = _unbind,					\
> +	};								\
> +	MODULE_ALIAS("ghfunc:"__stringify(_type))
> +
> +#define module_gunyah_vm_function(__gf)					\
> +	module_driver(__gf, gh_vm_function_register, gh_vm_function_unregister)
> +
> +#define DECLARE_GUNYAH_VM_FUNCTION_INIT(_name, _type, _bind, _unbind)	\
> +	DECLARE_GUNYAH_VM_FUNCTION(_name, _type, _bind, _unbind);	\
> +	module_gunyah_vm_function(_name)
> +
> +#endif
> diff --git a/include/uapi/linux/gunyah.h b/include/uapi/linux/gunyah.h
> index d899bba6a4c6..8df455a2a293 100644
> --- a/include/uapi/linux/gunyah.h
> +++ b/include/uapi/linux/gunyah.h
> @@ -66,4 +66,21 @@ struct gh_vm_dtb_config {
>   
>   #define GH_VM_START		_IO(GH_IOCTL_TYPE, 0x3)
>   
> +#define GH_FN_MAX_ARG_SIZE		256
> +
> +/**
> + * struct gh_fn_desc - Arguments to create a VM function
> + * @type: Type of the function. See GH_FN_* macro for supported types
> + * @arg_size: Size of argument to pass to the function

a note on max arg size  of 256 bytes would be useful.

> + * @arg: Value or pointer to argument given to the function

Treating this as value when arg_size == 0 is really confusing abi.
how about just use as arg as ptr to data along with arg_size;

--srini
> + */
> +struct gh_fn_desc {
> +	__u32 type;
> +	__u32 arg_size;
> +	__u64 arg;
> +};
> +
> +#define GH_VM_ADD_FUNCTION	_IOW(GH_IOCTL_TYPE, 0x4, struct gh_fn_desc)
> +#define GH_VM_REMOVE_FUNCTION	_IOW(GH_IOCTL_TYPE, 0x7, struct gh_fn_desc)

Do you have an example of how add and rm ioctls are used w.r.t to arg, i 
see that we check correcteness of arg in between add and remove.

--srini
> +
>   #endif
  
Elliot Berman Feb. 24, 2023, 11:44 p.m. UTC | #5
On 2/22/2023 6:08 AM, Srinivas Kandagatla wrote:
> 
> 
> On 14/02/2023 21:25, Elliot Berman wrote:
>>
>> Introduce a framework for Gunyah userspace to install VM functions. VM
>> functions are optional interfaces to the virtual machine. vCPUs,
>> ioeventfs, and irqfds are examples of such VM functions and are
>> implemented in subsequent patches.
>>
>> A generic framework is implemented instead of individual ioctls to
>> create vCPUs, irqfds, etc., in order to simplify the VM manager core
>> implementation and allow dynamic loading of VM function modules.
>>
>> Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
>> ---
>>   Documentation/virt/gunyah/vm-manager.rst |  18 ++
>>   drivers/virt/gunyah/vm_mgr.c             | 240 ++++++++++++++++++++++-
>>   drivers/virt/gunyah/vm_mgr.h             |   3 +
>>   include/linux/gunyah_vm_mgr.h            |  80 ++++++++
>>   include/uapi/linux/gunyah.h              |  17 ++
>>   5 files changed, 353 insertions(+), 5 deletions(-)
>>   create mode 100644 include/linux/gunyah_vm_mgr.h
>>
>> diff --git a/Documentation/virt/gunyah/vm-manager.rst 
>> b/Documentation/virt/gunyah/vm-manager.rst
>> index c0126cfeadc7..5272a6e9145c 100644
>> --- a/Documentation/virt/gunyah/vm-manager.rst
>> +++ b/Documentation/virt/gunyah/vm-manager.rst
>> @@ -17,6 +17,24 @@ sharing userspace memory with a VM is done via the 
>> GH_VM_SET_USER_MEM_REGION
>>   ioctl. The VM itself is configured to use the memory region via the
>>   devicetree.
>> +Gunyah Functions
>> +================
>> +
>> +Components of a Gunyah VM's configuration that need kernel 
>> configuration are
>> +called "functions" and are built on top of a framework. Functions are 
>> identified
>> +by a string and have some argument(s) to configure them. They are 
>> typically
>> +created by the `GH_VM_ADD_FUNCTION` ioctl.
>> +
>> +Functions typically will always do at least one of these operations:
>> +
>> +1. Create resource ticket(s). Resource tickets allow a function to 
>> register
>> +   itself as the client for a Gunyah resource (e.g. doorbell or vCPU) 
>> and
>> +   the function is given the pointer to the `struct gunyah_resource` 
>> when the
>> +   VM is starting.
>> +
>> +2. Register IO handler(s). IO handlers allow a function to handle 
>> stage-2 faults
>> +   from the virtual machine.
>> +
>>   Sample Userspace VMM
>>   ====================
>> diff --git a/drivers/virt/gunyah/vm_mgr.c b/drivers/virt/gunyah/vm_mgr.c
>> index fa324385ade5..e9c55e7dd1b3 100644
>> --- a/drivers/virt/gunyah/vm_mgr.c
>> +++ b/drivers/virt/gunyah/vm_mgr.c
>> @@ -6,8 +6,10 @@
>>   #define pr_fmt(fmt) "gh_vm_mgr: " fmt
>>   #include <linux/anon_inodes.h>
>> +#include <linux/compat.h>
>>   #include <linux/file.h>
>>   #include <linux/gunyah_rsc_mgr.h>
>> +#include <linux/gunyah_vm_mgr.h>
>>   #include <linux/miscdevice.h>
>>   #include <linux/mm.h>
>>   #include <linux/module.h>
>> @@ -16,6 +18,177 @@
>>   #include "vm_mgr.h"
>> +static DEFINE_MUTEX(functions_lock);
>> +static DEFINE_IDR(functions);
> Why are these global? Can these be not part of struc gh_rm?

If I make the list part of gh_rm, the core gunyah framework would need 
to know and be dependent on all possible VM functions. This prevents 
CONFIG_GUNYAH=y and CONFIG_GUNYAH_IRQFD=m (or any other function from 
being enabled as a module). I also see a 2-way dependency when enabled 
all enabled as modules. This approach seems vastly simpler.

> Not to mention please move idr to xarrays.
> 

Done.

>> +
>> +int gh_vm_function_register(struct gh_vm_function *drv)
>> +{
>> +    int ret = 0;
>> +
>> +    if (!drv->bind || !drv->unbind)
>> +        return -EINVAL;
>> +
>> +    mutex_lock(&functions_lock);
>> +    if (idr_find(&functions, drv->type)) {
>> +        ret = -EEXIST;
>> +        goto out;
>> +    }
>> +
>> +    INIT_LIST_HEAD(&drv->instances);
>> +    ret = idr_alloc(&functions, drv, drv->type, drv->type + 1, 
>> GFP_KERNEL);
>> +    if (ret > 0)
>> +        ret = 0;
>> +out:
>> +    mutex_unlock(&functions_lock);
>> +    return ret;
>> +}
>> +EXPORT_SYMBOL_GPL(gh_vm_function_register);
>> +
>> +static void gh_vm_remove_function_instance(struct 
>> gh_vm_function_instance *inst)
>> +    __must_hold(functions_lock)
>> +{
>> +    inst->fn->unbind(inst);
>> +    list_del(&inst->vm_list);
>> +    list_del(&inst->fn_list);
>> +    module_put(inst->fn->mod);
>> +    if (inst->arg_size)
>> +        kfree(inst->argp);
>> +    kfree(inst);
>> +}
>> +
>> +void gh_vm_function_unregister(struct gh_vm_function *fn)
>> +{
>> +    struct gh_vm_function_instance *inst, *iter;
>> +
>> +    mutex_lock(&functions_lock);
>> +    list_for_each_entry_safe(inst, iter, &fn->instances,fn_list)
>> +        gh_vm_remove_function_instance(inst);
> 
> We should never have any instances as we have refcounted the module.
> 
> If there are any instances then its clearly a bug, as this will pull out 
> function under the hood while userspace is using it.
> 
> 

Done.

>> +    idr_remove(&functions, fn->type);
>> +    mutex_unlock(&functions_lock);
>> +}
>> +EXPORT_SYMBOL_GPL(gh_vm_function_unregister);
>> +
>> +static long gh_vm_add_function(struct gh_vm *ghvm, struct gh_fn_desc *f)
>> +{
>> +    struct gh_vm_function_instance *inst;
>> +    void __user *argp;
>> +    long r = 0;
>> +
>> +    if (f->arg_size > GH_FN_MAX_ARG_SIZE)
> 
> lets print some useful error message to user.
> 
>> +        return -EINVAL;
>> +
>> +    inst = kzalloc(sizeof(*inst), GFP_KERNEL);
>> +    if (!inst)
>> +        return -ENOMEM;
>> +
>> +    inst->arg_size = f->arg_size;
>> +    if (inst->arg_size) {
>> +        inst->argp = kzalloc(inst->arg_size, GFP_KERNEL);
>> +        if (!inst->arg) {
>> +            r = -ENOMEM;
>> +            gotofree;
>> +        }
>> +
>> +        argp = is_compat_task() ? compat_ptr(f->arg) : (void __user 
>> *) f->arg;
> 
> hmm, arg is not a data pointer it is a fixed size variable (__u64 arg), 
> so why are using compat_ptr() here?
> 
> you should be able to do
> 
> argp = u64_to_user_ptr(f->arg);
> 

Done.

>> +        if (copy_from_user(inst->argp, argp, f->arg_size)) {
>> +            r = -EFAULT;
>> +            gotofree_arg;
>> +        }
>> +    } else {
>> +        inst->arg = f->arg;
> bit lost here, so, we treat the arg as both pointer and value in cases 
> where size is zero.
> 

That's correct -- arg is value in cases where size is zero. It might be 
a bit too strange, and I'm not opposed to always treating arg as pointer 
when size >= 0 and none of the "arg is value". In vCPU and several other 
possible functions we have (presently) downstream, the only argument a 
only a 32-bit integer label. I thought it would be good to optimize this 
path.

>> +    }
>> +
> <---
>> +    mutex_lock(&functions_lock);
>> +    inst->fn = idr_find(&functions, f->type);
>> +    if (!inst->fn) {
>> +        mutex_unlock(&functions_lock);
>> +        r = request_module("ghfunc:%d", f->type);
>> +        if (r)
>> +            gotounlock_free;
>> +
>> +        mutex_lock(&functions_lock);
>> +        inst->fn = idr_find(&functions, f->type);
>> +    }
>> +
>> +    if (!inst->fn) {
>> +        r = -ENOENT;
>> +        goto unlock_free;
>> +    }
>> +
>> +    if (!try_module_get(inst->fn->mod)) {
>> +        r = -ENOENT;
>> +        inst->fn = NULL;
>> +        goto unlock_free;
>> +    }
>> +
> --->
> can we do this snippet as a gh_vm_get_function() and corresponding 
> gh_vm_put_function(). that should make the code more cleaner.
> 

Done.

> 
>> +    inst->ghvm = ghvm;
>> +    inst->rm = ghvm->rm;
>> +
>> +    r = inst->fn->bind(inst);
>> +    if (r < 0) {
>> +        module_put(inst->fn->mod);
>> +        goto unlock_free;
>> +    }
>> +
>> +    list_add(&inst->vm_list, &ghvm->functions);
> 
> I guess its possible to add same functions with same argumentso to this 
> list, how are we preventing this to happen?
> 
> Is it a valid usecase?
> 

I'm not able to think of a function where this is valid. This is handled 
today because the ->bind() callback returns an error. This happens if 
the resource ticket was already created, e.g. same vCPU is requested twice.

This should be handled at function level because two arguments could be 
different but request the same resource. For instance, two 
GH_VM_ADD_FUNCTION to create irqfd with same doorbell. One call is with 
LEVEL flag and the other doesn't have the flag set. Only once we try to 
register the doorbell resource ticket do we realize that 2nd 
GH_VM_ADD_FUNCTION should fail.

>> +    list_add(&inst->fn_list, &inst->fn->instances);
>> +    mutex_unlock(&functions_lock);
>> +    return r;
>> +unlock_free:
>> +    mutex_unlock(&functions_lock);
>> +free_arg:
>> +    if (inst->arg_size)
>> +        kfree(inst->argp);
>> +free:
>> +    kfree(inst);
>> +    return r;
>> +}
>> +
>> +static long gh_vm_rm_function(struct gh_vm *ghvm, struct gh_fn_desc *f)
>> +{
>> +    struct gh_vm_function_instance *inst, *iter;
>> +    void __user *user_argp;
>> +    void *argp;
>> +    long r = 0;
>> +
>> +    r = mutex_lock_interruptible(&functions_lock);
>> +    if (r)
>> +        return r;
>> +
>> +    if (f->arg_size) {
>> +        argp = kzalloc(f->arg_size, GFP_KERNEL);
>> +        if (!argp) {
>> +            r = -ENOMEM;
>> +            gotoout;
>> +        }
>> +
>> +        user_argp = is_compat_task() ? compat_ptr(f->arg) : (void 
>> __user *) f->arg;
> 
> same comment as add;
> 
>> +        if (copy_from_user(argp, user_argp, f->arg_size)) {
>> +            r = -EFAULT;
>> +            kfree(argp);
>> +            gotoout;
>> +        }
>> +
>> +        list_for_each_entry_safe(inst, iter, &ghvm->functions, 
>> vm_list) {
>> +            if (inst->fn->type == f->type &&
>> +                f->arg_size == inst->arg_size &&
>> +                !memcmp(argp, inst->argp, f->arg_size))
>> +                gh_vm_remove_function_instance(inst);
>> +        }
> 
> leaking argp;
> 

Done.

>> +    } else {
>> +        list_for_each_entry_safe(inst, iter, &ghvm->functions, 
>> vm_list) {
>> +            if (inst->fn->type == f->type &&
>> +                f->arg_size == inst->arg_size &&
>> +                inst->arg == f->arg)
>> +                gh_vm_remove_function_instance(inst);
>> +        }
>> +    }
>> +
>> +out:
>> +    mutex_unlock(&functions_lock);
>> +    return r;
>> +}
>> +
>>   static int gh_vm_rm_notification_status(struct gh_vm *ghvm, void *data)
>>   {
>>       struct gh_rm_vm_status_payload *payload = data;
>> @@ -80,6 +253,7 @@ static void gh_vm_stop(struct gh_vm *ghvm)
>>   static void gh_vm_free(struct work_struct *work)
>>   {
>>       struct gh_vm *ghvm = container_of(work,struct gh_vm, free_work);
>> +    struct gh_vm_function_instance *inst, *iiter;
>>       struct gh_vm_mem *mapping, *tmp;
>>       int ret;
>> @@ -90,7 +264,13 @@ static void gh_vm_free(struct work_struct *work)
>>           fallthrough;
>>       case GH_RM_VM_STATUS_INIT_FAILED:
>>       case GH_RM_VM_STATUS_LOAD:
>> -    case GH_RM_VM_STATUS_LOAD_FAILED:
>> +    case GH_RM_VM_STATUS_EXITED:
>> +        mutex_lock(&functions_lock);
>> +        list_for_each_entry_safe(inst, iiter, &ghvm->functions, 
>> vm_list) {
>> +            gh_vm_remove_function_instance(inst);
>> +        }
>> +        mutex_unlock(&functions_lock);
>> +
>>           mutex_lock(&ghvm->mm_lock);
>>           list_for_each_entry_safe(mapping, tmp, 
>> &ghvm->memory_mappings, list) {
>>               gh_vm_mem_reclaim(ghvm, mapping);
>> @@ -113,6 +293,28 @@ static void gh_vm_free(struct work_struct *work)
>>       }
>>   }
>> +static void _gh_vm_put(struct kref *kref)
>> +{
>> +    struct gh_vm *ghvm = container_of(kref, struct gh_vm, kref);
>> +
>> +    /* VM will be reset and make RM calls which can interruptible sleep.
>> +     * Defer to a work so this thread can receive signal.
>> +     */
>> +    schedule_work(&ghvm->free_work);
>> +}
>> +
>> +int __must_check gh_vm_get(struct gh_vm *ghvm)
>> +{
>> +    return kref_get_unless_zero(&ghvm->kref);
>> +}
>> +EXPORT_SYMBOL_GPL(gh_vm_get);
>> +
>> +void gh_vm_put(struct gh_vm *ghvm)
>> +{
>> +    kref_put(&ghvm->kref, _gh_vm_put);
>> +}
>> +EXPORT_SYMBOL_GPL(gh_vm_put);
>> +
>>   static __must_check struct gh_vm *gh_vm_alloc(struct gh_rm *rm)
>>   {
>>       struct gh_vm *ghvm;
>> @@ -147,6 +349,8 @@ static __must_check struct gh_vm 
>> *gh_vm_alloc(struct gh_rm *rm)
>>       INIT_LIST_HEAD(&ghvm->memory_mappings);
>>       init_rwsem(&ghvm->status_lock);
>>       INIT_WORK(&ghvm->free_work, gh_vm_free);
>> +    kref_init(&ghvm->kref);
>> +    INIT_LIST_HEAD(&ghvm->functions);
>>       ghvm->vm_status = GH_RM_VM_STATUS_LOAD;
>>       return ghvm;
>> @@ -291,6 +495,35 @@ static long gh_vm_ioctl(struct file *filp, 
>> unsigned int cmd, unsigned long arg)
>>           r = gh_vm_ensure_started(ghvm);
>>           break;
>>       }
>> +    case GH_VM_ADD_FUNCTION: {
>> +        struct gh_fn_desc *f;
>> +
>> +        f = kzalloc(sizeof(*f), GFP_KERNEL);
>> +        if (!f)
>> +            return -ENOMEM;
>> +
>> +        if (copy_from_user(f, argp, sizeof(*f)))
>> +            return -EFAULT;
>> +
>> +        r = gh_vm_add_function(ghvm, f);
>> +        if (r < 0)
>> +            kfree(f);
> 
> 
> we are memory leaking f here, we should free it irrespective of return 
> value. or I see no reason not to use this small struct from stack.
> 

I can copy to stack directly.

> 
>> +        break;
>> +    }
>> +    case GH_VM_REMOVE_FUNCTION: {
>> +        struct gh_fn_desc *f;
>> +
>> +        f = kzalloc(sizeof(*f), GFP_KERNEL);
>> +        if (!f)
>> +            return -ENOMEM;
>> +
>> +        if (copy_from_user(f, argp, sizeof(*f)))
>> +            return -EFAULT;
>> +
>> +        r = gh_vm_rm_function(ghvm, f);
>> +        kfree(f);
>> +        break;
>> +    }
>>       default:
>>           r = -ENOTTY;
>>           break;
>> @@ -303,10 +536,7 @@ static int gh_vm_release(struct inode *inode, 
>> struct file *filp)
>>   {
>>       struct gh_vm *ghvm = filp->private_data;
>> -    /* VM will be reset and make RM calls which can interruptible sleep.
>> -     * Defer to a work so this thread can receive signal.
>> -     */
>> -    schedule_work(&ghvm->free_work);
>> +    gh_vm_put(ghvm);
>>       return 0;
>>   }
>> diff --git a/drivers/virt/gunyah/vm_mgr.h b/drivers/virt/gunyah/vm_mgr.h
>> index e9cf56647cc2..4750d56c1297 100644
>> --- a/drivers/virt/gunyah/vm_mgr.h
>> +++ b/drivers/virt/gunyah/vm_mgr.h
>> @@ -8,6 +8,7 @@
>>   #include <linux/gunyah_rsc_mgr.h>
>>   #include <linux/list.h>
>> +#include <linux/kref.h>
>>   #include <linux/miscdevice.h>
>>   #include <linux/mutex.h>
>>   #include <linux/rwsem.h>
>> @@ -44,8 +45,10 @@ struct gh_vm {
>>       struct rw_semaphore status_lock;
>>       struct work_struct free_work;
>> +    struct kref kref;
>>       struct mutex mm_lock;
>>       struct list_head memory_mappings;
>> +    struct list_head functions;
>>   };
>>   int gh_vm_mem_alloc(struct gh_vm *ghvm, struct 
>> gh_userspace_memory_region *region);
>> diff --git a/include/linux/gunyah_vm_mgr.h 
>> b/include/linux/gunyah_vm_mgr.h
>> new file mode 100644
>> index 000000000000..f0a95af50b2e
>> --- /dev/null
>> +++ b/include/linux/gunyah_vm_mgr.h
>> @@ -0,0 +1,80 @@
>> +/* SPDX-License-Identifier: GPL-2.0-only */
>> +/*
>> + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All 
>> rights reserved.
>> + */
>> +
>> +#ifndef _GUNYAH_VM_MGR_H
>> +#define _GUNYAH_VM_MGR_H
>> +
>> +#include <linux/compiler_types.h>
>> +#include <linux/gunyah.h>
>> +#include <linux/gunyah_rsc_mgr.h>
>> +#include <linux/list.h>
>> +#include <linux/mod_devicetable.h>
> ??
> 
>> +#include <linux/notifier.h>
> 
> ??
> 
>> +
>> +#include <uapi/linux/gunyah.h>
>> +
>> +struct gh_vm;
>> +
>> +int __must_check gh_vm_get(struct gh_vm *ghvm);
>> +void gh_vm_put(struct gh_vm *ghvm);
>> +
>> +struct gh_vm_function_instance;
>> +struct gh_vm_function {
>> +    u32 type; > +    const char *name;
>> +    struct module *mod;
>> +    long (*bind)(struct gh_vm_function_instance *f);
>> +    void (*unbind)(struct gh_vm_function_instance *f);
>> +    struct mutex instances_lock;
>> +    struct list_head instances;
>> +};
>> +
>> +/**
>> + * struct gh_vm_function_instance - Represents one function instance
>> + * @arg_size: size of user argument
>> + * @arg: user argument to describe the function instance; arg_size is 0
>> + * @argp: pointer to user argument
>> + * @ghvm: Pointer to VM instance
>> + * @rm: Pointer to resource manager for the VM instance
>> + * @fn: The ops for the function
>> + * @data: Private data for function
>> + * @vm_list: for gh_vm's functions list
>> + * @fn_list: for gh_vm_function's instances list
>> + */
>> +struct gh_vm_function_instance {
>> +    size_t arg_size;
>> +    union {
>> +        u64 arg;
>> +        void *argp;
>> +    };
>> +    struct gh_vm *ghvm;
>> +    struct gh_rm *rm;
>> +    struct gh_vm_function *fn;
>> +    void *data;
>> +    struct list_head vm_list;
>> +    struct list_head fn_list;
> Am not seeing any advantage of storing the instance in two different 
> list, they look redundant to me. storing the function instances in vm 
> should be good IMO.
> 

I was concerned about removal of function outside of module_removal, but 
we don't have reason/ability to do it.

> 
>> +};
>> +
>> +int gh_vm_function_register(struct gh_vm_function *f);
>> +void gh_vm_function_unregister(struct gh_vm_function *f);
>> +
>> +#define DECLARE_GUNYAH_VM_FUNCTION(_name, _type, _bind, _unbind)    \
>> +    static struct gh_vm_function _name = {        \
>> +        .type = _type,                        \
>> +        .name = __stringify(_name),                \
>> +        .mod = THIS_MODULE,                    \
>> +        .bind = _bind,                        \
>> +        .unbind = _unbind,                    \
>> +    };                                \
>> +    MODULE_ALIAS("ghfunc:"__stringify(_type))
>> +
>> +#define module_gunyah_vm_function(__gf)                    \
>> +    module_driver(__gf, gh_vm_function_register, 
>> gh_vm_function_unregister)
>> +
>> +#define DECLARE_GUNYAH_VM_FUNCTION_INIT(_name, _type, _bind, 
>> _unbind)    \
>> +    DECLARE_GUNYAH_VM_FUNCTION(_name, _type, _bind, _unbind);    \
>> +    module_gunyah_vm_function(_name)
>> +
>> +#endif
>> diff --git a/include/uapi/linux/gunyah.h b/include/uapi/linux/gunyah.h
>> index d899bba6a4c6..8df455a2a293 100644
>> --- a/include/uapi/linux/gunyah.h
>> +++ b/include/uapi/linux/gunyah.h
>> @@ -66,4 +66,21 @@ struct gh_vm_dtb_config {
>>   #define GH_VM_START        _IO(GH_IOCTL_TYPE, 0x3)
>> +#define GH_FN_MAX_ARG_SIZE        256
>> +
>> +/**
>> + * struct gh_fn_desc - Arguments to create a VM function
>> + * @type: Type of the function. See GH_FN_* macro for supported types
>> + * @arg_size: Size of argument to pass to the function
> 
> a note on max arg size  of 256 bytes would be useful.
> 
>> + * @arg: Value or pointer to argument given to the function
> 
> Treating this as value when arg_size == 0 is really confusing abi.
> how about just use as arg as ptr to data along with arg_size;
> 
> --srini
>> + */
>> +struct gh_fn_desc {
>> +    __u32 type;
>> +    __u32 arg_size;
>> +    __u64 arg;
>> +};
>> +
>> +#define GH_VM_ADD_FUNCTION    _IOW(GH_IOCTL_TYPE, 0x4, struct 
>> gh_fn_desc)
>> +#define GH_VM_REMOVE_FUNCTION    _IOW(GH_IOCTL_TYPE, 0x7, struct 
>> gh_fn_desc)
> 
> Do you have an example of how add and rm ioctls are used w.r.t to arg, i 
> see that we check correcteness of arg in between add and remove.
> 
> --srini
>> +
>>   #endif
  

Patch

diff --git a/Documentation/virt/gunyah/vm-manager.rst b/Documentation/virt/gunyah/vm-manager.rst
index c0126cfeadc7..5272a6e9145c 100644
--- a/Documentation/virt/gunyah/vm-manager.rst
+++ b/Documentation/virt/gunyah/vm-manager.rst
@@ -17,6 +17,24 @@  sharing userspace memory with a VM is done via the GH_VM_SET_USER_MEM_REGION
 ioctl. The VM itself is configured to use the memory region via the
 devicetree.
 
+Gunyah Functions
+================
+
+Components of a Gunyah VM's configuration that need kernel configuration are
+called "functions" and are built on top of a framework. Functions are identified
+by a string and have some argument(s) to configure them. They are typically
+created by the `GH_VM_ADD_FUNCTION` ioctl.
+
+Functions typically will always do at least one of these operations:
+
+1. Create resource ticket(s). Resource tickets allow a function to register
+   itself as the client for a Gunyah resource (e.g. doorbell or vCPU) and
+   the function is given the pointer to the `struct gunyah_resource` when the
+   VM is starting.
+
+2. Register IO handler(s). IO handlers allow a function to handle stage-2 faults
+   from the virtual machine.
+
 Sample Userspace VMM
 ====================
 
diff --git a/drivers/virt/gunyah/vm_mgr.c b/drivers/virt/gunyah/vm_mgr.c
index fa324385ade5..e9c55e7dd1b3 100644
--- a/drivers/virt/gunyah/vm_mgr.c
+++ b/drivers/virt/gunyah/vm_mgr.c
@@ -6,8 +6,10 @@ 
 #define pr_fmt(fmt) "gh_vm_mgr: " fmt
 
 #include <linux/anon_inodes.h>
+#include <linux/compat.h>
 #include <linux/file.h>
 #include <linux/gunyah_rsc_mgr.h>
+#include <linux/gunyah_vm_mgr.h>
 #include <linux/miscdevice.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -16,6 +18,177 @@ 
 
 #include "vm_mgr.h"
 
+static DEFINE_MUTEX(functions_lock);
+static DEFINE_IDR(functions);
+
+int gh_vm_function_register(struct gh_vm_function *drv)
+{
+	int ret = 0;
+
+	if (!drv->bind || !drv->unbind)
+		return -EINVAL;
+
+	mutex_lock(&functions_lock);
+	if (idr_find(&functions, drv->type)) {
+		ret = -EEXIST;
+		goto out;
+	}
+
+	INIT_LIST_HEAD(&drv->instances);
+	ret = idr_alloc(&functions, drv, drv->type, drv->type + 1, GFP_KERNEL);
+	if (ret > 0)
+		ret = 0;
+out:
+	mutex_unlock(&functions_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(gh_vm_function_register);
+
+static void gh_vm_remove_function_instance(struct gh_vm_function_instance *inst)
+	__must_hold(functions_lock)
+{
+	inst->fn->unbind(inst);
+	list_del(&inst->vm_list);
+	list_del(&inst->fn_list);
+	module_put(inst->fn->mod);
+	if (inst->arg_size)
+		kfree(inst->argp);
+	kfree(inst);
+}
+
+void gh_vm_function_unregister(struct gh_vm_function *fn)
+{
+	struct gh_vm_function_instance *inst, *iter;
+
+	mutex_lock(&functions_lock);
+	list_for_each_entry_safe(inst, iter, &fn->instances, fn_list)
+		gh_vm_remove_function_instance(inst);
+	idr_remove(&functions, fn->type);
+	mutex_unlock(&functions_lock);
+}
+EXPORT_SYMBOL_GPL(gh_vm_function_unregister);
+
+static long gh_vm_add_function(struct gh_vm *ghvm, struct gh_fn_desc *f)
+{
+	struct gh_vm_function_instance *inst;
+	void __user *argp;
+	long r = 0;
+
+	if (f->arg_size > GH_FN_MAX_ARG_SIZE)
+		return -EINVAL;
+
+	inst = kzalloc(sizeof(*inst), GFP_KERNEL);
+	if (!inst)
+		return -ENOMEM;
+
+	inst->arg_size = f->arg_size;
+	if (inst->arg_size) {
+		inst->argp = kzalloc(inst->arg_size, GFP_KERNEL);
+		if (!inst->arg) {
+			r = -ENOMEM;
+			goto free;
+		}
+
+		argp = is_compat_task() ? compat_ptr(f->arg) : (void __user *) f->arg;
+		if (copy_from_user(inst->argp, argp, f->arg_size)) {
+			r = -EFAULT;
+			goto free_arg;
+		}
+	} else {
+		inst->arg = f->arg;
+	}
+
+	mutex_lock(&functions_lock);
+	inst->fn = idr_find(&functions, f->type);
+	if (!inst->fn) {
+		mutex_unlock(&functions_lock);
+		r = request_module("ghfunc:%d", f->type);
+		if (r)
+			goto unlock_free;
+
+		mutex_lock(&functions_lock);
+		inst->fn = idr_find(&functions, f->type);
+	}
+
+	if (!inst->fn) {
+		r = -ENOENT;
+		goto unlock_free;
+	}
+
+	if (!try_module_get(inst->fn->mod)) {
+		r = -ENOENT;
+		inst->fn = NULL;
+		goto unlock_free;
+	}
+
+	inst->ghvm = ghvm;
+	inst->rm = ghvm->rm;
+
+	r = inst->fn->bind(inst);
+	if (r < 0) {
+		module_put(inst->fn->mod);
+		goto unlock_free;
+	}
+
+	list_add(&inst->vm_list, &ghvm->functions);
+	list_add(&inst->fn_list, &inst->fn->instances);
+	mutex_unlock(&functions_lock);
+	return r;
+unlock_free:
+	mutex_unlock(&functions_lock);
+free_arg:
+	if (inst->arg_size)
+		kfree(inst->argp);
+free:
+	kfree(inst);
+	return r;
+}
+
+static long gh_vm_rm_function(struct gh_vm *ghvm, struct gh_fn_desc *f)
+{
+	struct gh_vm_function_instance *inst, *iter;
+	void __user *user_argp;
+	void *argp;
+	long r = 0;
+
+	r = mutex_lock_interruptible(&functions_lock);
+	if (r)
+		return r;
+
+	if (f->arg_size) {
+		argp = kzalloc(f->arg_size, GFP_KERNEL);
+		if (!argp) {
+			r = -ENOMEM;
+			goto out;
+		}
+
+		user_argp = is_compat_task() ? compat_ptr(f->arg) : (void __user *) f->arg;
+		if (copy_from_user(argp, user_argp, f->arg_size)) {
+			r = -EFAULT;
+			kfree(argp);
+			goto out;
+		}
+
+		list_for_each_entry_safe(inst, iter, &ghvm->functions, vm_list) {
+			if (inst->fn->type == f->type &&
+				f->arg_size == inst->arg_size &&
+				!memcmp(argp, inst->argp, f->arg_size))
+				gh_vm_remove_function_instance(inst);
+		}
+	} else {
+		list_for_each_entry_safe(inst, iter, &ghvm->functions, vm_list) {
+			if (inst->fn->type == f->type &&
+				f->arg_size == inst->arg_size &&
+				inst->arg == f->arg)
+				gh_vm_remove_function_instance(inst);
+		}
+	}
+
+out:
+	mutex_unlock(&functions_lock);
+	return r;
+}
+
 static int gh_vm_rm_notification_status(struct gh_vm *ghvm, void *data)
 {
 	struct gh_rm_vm_status_payload *payload = data;
@@ -80,6 +253,7 @@  static void gh_vm_stop(struct gh_vm *ghvm)
 static void gh_vm_free(struct work_struct *work)
 {
 	struct gh_vm *ghvm = container_of(work, struct gh_vm, free_work);
+	struct gh_vm_function_instance *inst, *iiter;
 	struct gh_vm_mem *mapping, *tmp;
 	int ret;
 
@@ -90,7 +264,13 @@  static void gh_vm_free(struct work_struct *work)
 		fallthrough;
 	case GH_RM_VM_STATUS_INIT_FAILED:
 	case GH_RM_VM_STATUS_LOAD:
-	case GH_RM_VM_STATUS_LOAD_FAILED:
+	case GH_RM_VM_STATUS_EXITED:
+		mutex_lock(&functions_lock);
+		list_for_each_entry_safe(inst, iiter, &ghvm->functions, vm_list) {
+			gh_vm_remove_function_instance(inst);
+		}
+		mutex_unlock(&functions_lock);
+
 		mutex_lock(&ghvm->mm_lock);
 		list_for_each_entry_safe(mapping, tmp, &ghvm->memory_mappings, list) {
 			gh_vm_mem_reclaim(ghvm, mapping);
@@ -113,6 +293,28 @@  static void gh_vm_free(struct work_struct *work)
 	}
 }
 
+static void _gh_vm_put(struct kref *kref)
+{
+	struct gh_vm *ghvm = container_of(kref, struct gh_vm, kref);
+
+	/* VM will be reset and make RM calls which can interruptible sleep.
+	 * Defer to a work so this thread can receive signal.
+	 */
+	schedule_work(&ghvm->free_work);
+}
+
+int __must_check gh_vm_get(struct gh_vm *ghvm)
+{
+	return kref_get_unless_zero(&ghvm->kref);
+}
+EXPORT_SYMBOL_GPL(gh_vm_get);
+
+void gh_vm_put(struct gh_vm *ghvm)
+{
+	kref_put(&ghvm->kref, _gh_vm_put);
+}
+EXPORT_SYMBOL_GPL(gh_vm_put);
+
 static __must_check struct gh_vm *gh_vm_alloc(struct gh_rm *rm)
 {
 	struct gh_vm *ghvm;
@@ -147,6 +349,8 @@  static __must_check struct gh_vm *gh_vm_alloc(struct gh_rm *rm)
 	INIT_LIST_HEAD(&ghvm->memory_mappings);
 	init_rwsem(&ghvm->status_lock);
 	INIT_WORK(&ghvm->free_work, gh_vm_free);
+	kref_init(&ghvm->kref);
+	INIT_LIST_HEAD(&ghvm->functions);
 	ghvm->vm_status = GH_RM_VM_STATUS_LOAD;
 
 	return ghvm;
@@ -291,6 +495,35 @@  static long gh_vm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		r = gh_vm_ensure_started(ghvm);
 		break;
 	}
+	case GH_VM_ADD_FUNCTION: {
+		struct gh_fn_desc *f;
+
+		f = kzalloc(sizeof(*f), GFP_KERNEL);
+		if (!f)
+			return -ENOMEM;
+
+		if (copy_from_user(f, argp, sizeof(*f)))
+			return -EFAULT;
+
+		r = gh_vm_add_function(ghvm, f);
+		if (r < 0)
+			kfree(f);
+		break;
+	}
+	case GH_VM_REMOVE_FUNCTION: {
+		struct gh_fn_desc *f;
+
+		f = kzalloc(sizeof(*f), GFP_KERNEL);
+		if (!f)
+			return -ENOMEM;
+
+		if (copy_from_user(f, argp, sizeof(*f)))
+			return -EFAULT;
+
+		r = gh_vm_rm_function(ghvm, f);
+		kfree(f);
+		break;
+	}
 	default:
 		r = -ENOTTY;
 		break;
@@ -303,10 +536,7 @@  static int gh_vm_release(struct inode *inode, struct file *filp)
 {
 	struct gh_vm *ghvm = filp->private_data;
 
-	/* VM will be reset and make RM calls which can interruptible sleep.
-	 * Defer to a work so this thread can receive signal.
-	 */
-	schedule_work(&ghvm->free_work);
+	gh_vm_put(ghvm);
 	return 0;
 }
 
diff --git a/drivers/virt/gunyah/vm_mgr.h b/drivers/virt/gunyah/vm_mgr.h
index e9cf56647cc2..4750d56c1297 100644
--- a/drivers/virt/gunyah/vm_mgr.h
+++ b/drivers/virt/gunyah/vm_mgr.h
@@ -8,6 +8,7 @@ 
 
 #include <linux/gunyah_rsc_mgr.h>
 #include <linux/list.h>
+#include <linux/kref.h>
 #include <linux/miscdevice.h>
 #include <linux/mutex.h>
 #include <linux/rwsem.h>
@@ -44,8 +45,10 @@  struct gh_vm {
 	struct rw_semaphore status_lock;
 
 	struct work_struct free_work;
+	struct kref kref;
 	struct mutex mm_lock;
 	struct list_head memory_mappings;
+	struct list_head functions;
 };
 
 int gh_vm_mem_alloc(struct gh_vm *ghvm, struct gh_userspace_memory_region *region);
diff --git a/include/linux/gunyah_vm_mgr.h b/include/linux/gunyah_vm_mgr.h
new file mode 100644
index 000000000000..f0a95af50b2e
--- /dev/null
+++ b/include/linux/gunyah_vm_mgr.h
@@ -0,0 +1,80 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef _GUNYAH_VM_MGR_H
+#define _GUNYAH_VM_MGR_H
+
+#include <linux/compiler_types.h>
+#include <linux/gunyah.h>
+#include <linux/gunyah_rsc_mgr.h>
+#include <linux/list.h>
+#include <linux/mod_devicetable.h>
+#include <linux/notifier.h>
+
+#include <uapi/linux/gunyah.h>
+
+struct gh_vm;
+
+int __must_check gh_vm_get(struct gh_vm *ghvm);
+void gh_vm_put(struct gh_vm *ghvm);
+
+struct gh_vm_function_instance;
+struct gh_vm_function {
+	u32 type;
+	const char *name;
+	struct module *mod;
+	long (*bind)(struct gh_vm_function_instance *f);
+	void (*unbind)(struct gh_vm_function_instance *f);
+	struct mutex instances_lock;
+	struct list_head instances;
+};
+
+/**
+ * struct gh_vm_function_instance - Represents one function instance
+ * @arg_size: size of user argument
+ * @arg: user argument to describe the function instance; arg_size is 0
+ * @argp: pointer to user argument
+ * @ghvm: Pointer to VM instance
+ * @rm: Pointer to resource manager for the VM instance
+ * @fn: The ops for the function
+ * @data: Private data for function
+ * @vm_list: for gh_vm's functions list
+ * @fn_list: for gh_vm_function's instances list
+ */
+struct gh_vm_function_instance {
+	size_t arg_size;
+	union {
+		u64 arg;
+		void *argp;
+	};
+	struct gh_vm *ghvm;
+	struct gh_rm *rm;
+	struct gh_vm_function *fn;
+	void *data;
+	struct list_head vm_list;
+	struct list_head fn_list;
+};
+
+int gh_vm_function_register(struct gh_vm_function *f);
+void gh_vm_function_unregister(struct gh_vm_function *f);
+
+#define DECLARE_GUNYAH_VM_FUNCTION(_name, _type, _bind, _unbind)	\
+	static struct gh_vm_function _name = {		\
+		.type = _type,						\
+		.name = __stringify(_name),				\
+		.mod = THIS_MODULE,					\
+		.bind = _bind,						\
+		.unbind = _unbind,					\
+	};								\
+	MODULE_ALIAS("ghfunc:"__stringify(_type))
+
+#define module_gunyah_vm_function(__gf)					\
+	module_driver(__gf, gh_vm_function_register, gh_vm_function_unregister)
+
+#define DECLARE_GUNYAH_VM_FUNCTION_INIT(_name, _type, _bind, _unbind)	\
+	DECLARE_GUNYAH_VM_FUNCTION(_name, _type, _bind, _unbind);	\
+	module_gunyah_vm_function(_name)
+
+#endif
diff --git a/include/uapi/linux/gunyah.h b/include/uapi/linux/gunyah.h
index d899bba6a4c6..8df455a2a293 100644
--- a/include/uapi/linux/gunyah.h
+++ b/include/uapi/linux/gunyah.h
@@ -66,4 +66,21 @@  struct gh_vm_dtb_config {
 
 #define GH_VM_START		_IO(GH_IOCTL_TYPE, 0x3)
 
+#define GH_FN_MAX_ARG_SIZE		256
+
+/**
+ * struct gh_fn_desc - Arguments to create a VM function
+ * @type: Type of the function. See GH_FN_* macro for supported types
+ * @arg_size: Size of argument to pass to the function
+ * @arg: Value or pointer to argument given to the function
+ */
+struct gh_fn_desc {
+	__u32 type;
+	__u32 arg_size;
+	__u64 arg;
+};
+
+#define GH_VM_ADD_FUNCTION	_IOW(GH_IOCTL_TYPE, 0x4, struct gh_fn_desc)
+#define GH_VM_REMOVE_FUNCTION	_IOW(GH_IOCTL_TYPE, 0x7, struct gh_fn_desc)
+
 #endif