[RFC,06/28] arm64: RME: ioctls to create and configure realms

Message ID 20230127112932.38045-7-steven.price@arm.com
State New
Headers
Series arm64: Support for Arm CCA in KVM |

Commit Message

Steven Price Jan. 27, 2023, 11:29 a.m. UTC
  Add the KVM_CAP_ARM_RME_CREATE_FD ioctl to create a realm. This involves
delegating pages to the RMM to hold the Realm Descriptor (RD) and for
the base level of the Realm Translation Tables (RTT). A VMID also need
to be picked, since the RMM has a separate VMID address space a
dedicated allocator is added for this purpose.

KVM_CAP_ARM_RME_CONFIG_REALM is provided to allow configuring the realm
before it is created.

Signed-off-by: Steven Price <steven.price@arm.com>
---
 arch/arm64/include/asm/kvm_rme.h |  14 ++
 arch/arm64/kvm/arm.c             |  19 ++
 arch/arm64/kvm/mmu.c             |   6 +
 arch/arm64/kvm/reset.c           |  33 +++
 arch/arm64/kvm/rme.c             | 357 +++++++++++++++++++++++++++++++
 5 files changed, 429 insertions(+)
  

Comments

Jean-Philippe Brucker Feb. 7, 2023, 12:25 p.m. UTC | #1
On Fri, Jan 27, 2023 at 11:29:10AM +0000, Steven Price wrote:
> +static int kvm_rme_config_realm(struct kvm *kvm, struct kvm_enable_cap *cap)
> +{
> +	struct kvm_cap_arm_rme_config_item cfg;
> +	struct realm *realm = &kvm->arch.realm;
> +	int r = 0;
> +
> +	if (kvm_realm_state(kvm) != REALM_STATE_NONE)
> +		return -EBUSY;

This should also check kvm_is_realm() (otherwise we dereference a NULL
realm).

I was wondering about fuzzing the API to find more of this kind of issue,
but don't know anything about it. Is there a recommended way to fuzz KVM?

Thanks,
Jean
  
Suzuki K Poulose Feb. 7, 2023, 12:55 p.m. UTC | #2
On 07/02/2023 12:25, Jean-Philippe Brucker wrote:
> On Fri, Jan 27, 2023 at 11:29:10AM +0000, Steven Price wrote:
>> +static int kvm_rme_config_realm(struct kvm *kvm, struct kvm_enable_cap *cap)
>> +{
>> +	struct kvm_cap_arm_rme_config_item cfg;
>> +	struct realm *realm = &kvm->arch.realm;
>> +	int r = 0;
>> +
>> +	if (kvm_realm_state(kvm) != REALM_STATE_NONE)
>> +		return -EBUSY;
> 
> This should also check kvm_is_realm() (otherwise we dereference a NULL
> realm).

Correct, I think this should be done way up in the stack at :

kvm_vm_ioctl_enable_cap() for KVM_CAP_ARM_RME.

> 
> I was wondering about fuzzing the API to find more of this kind of issue,
> but don't know anything about it. Is there a recommended way to fuzz KVM?

Not sure either. kselftests is one possible way to drive these test at 
least for unit-testing the new ABIs. This is something we plan to add.

Thanks for catching this.

Suzuki



> Thanks,
> Jean
>
  
Zhi Wang Feb. 13, 2023, 4:10 p.m. UTC | #3
On Fri, 27 Jan 2023 11:29:10 +0000
Steven Price <steven.price@arm.com> wrote:

> Add the KVM_CAP_ARM_RME_CREATE_FD ioctl to create a realm. This involves
> delegating pages to the RMM to hold the Realm Descriptor (RD) and for
> the base level of the Realm Translation Tables (RTT). A VMID also need
> to be picked, since the RMM has a separate VMID address space a
> dedicated allocator is added for this purpose.
> 
> KVM_CAP_ARM_RME_CONFIG_REALM is provided to allow configuring the realm
> before it is created.
> 
> Signed-off-by: Steven Price <steven.price@arm.com>
> ---
>  arch/arm64/include/asm/kvm_rme.h |  14 ++
>  arch/arm64/kvm/arm.c             |  19 ++
>  arch/arm64/kvm/mmu.c             |   6 +
>  arch/arm64/kvm/reset.c           |  33 +++
>  arch/arm64/kvm/rme.c             | 357 +++++++++++++++++++++++++++++++
>  5 files changed, 429 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h
> index c26bc2c6770d..055a22accc08 100644
> --- a/arch/arm64/include/asm/kvm_rme.h
> +++ b/arch/arm64/include/asm/kvm_rme.h
> @@ -6,6 +6,8 @@
>  #ifndef __ASM_KVM_RME_H
>  #define __ASM_KVM_RME_H
>  
> +#include <uapi/linux/kvm.h>
> +
>  enum realm_state {
>  	REALM_STATE_NONE,
>  	REALM_STATE_NEW,
> @@ -15,8 +17,20 @@ enum realm_state {
>  
>  struct realm {
>  	enum realm_state state;
> +
> +	void *rd;
> +	struct realm_params *params;
> +
> +	unsigned long num_aux;
> +	unsigned int vmid;
> +	unsigned int ia_bits;
>  };
>  

Maybe more comments for this structure?

>  int kvm_init_rme(void);
> +u32 kvm_realm_ipa_limit(void);
> +
> +int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap);
> +int kvm_init_realm_vm(struct kvm *kvm);
> +void kvm_destroy_realm(struct kvm *kvm);
>  
>  #endif
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index d97b39d042ab..50f54a63732a 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -103,6 +103,13 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
>  		r = 0;
>  		set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags);
>  		break;
> +	case KVM_CAP_ARM_RME:
> +		if (!static_branch_unlikely(&kvm_rme_is_available))
> +			return -EINVAL;
> +		mutex_lock(&kvm->lock);
> +		r = kvm_realm_enable_cap(kvm, cap);
> +		mutex_unlock(&kvm->lock);
> +		break;
>  	default:
>  		r = -EINVAL;
>  		break;
> @@ -172,6 +179,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>  	 */
>  	kvm->arch.dfr0_pmuver.imp = kvm_arm_pmu_get_pmuver_limit();
>  
> +	/* Initialise the realm bits after the generic bits are enabled */
> +	if (kvm_is_realm(kvm)) {
> +		ret = kvm_init_realm_vm(kvm);
> +		if (ret)
> +			goto err_free_cpumask;
> +	}
> +
>  	return 0;
>  
>  err_free_cpumask:
> @@ -204,6 +218,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
>  	kvm_destroy_vcpus(kvm);
>  
>  	kvm_unshare_hyp(kvm, kvm + 1);
> +
> +	kvm_destroy_realm(kvm);
>  }
>  
>  int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
> @@ -300,6 +316,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>  	case KVM_CAP_ARM_PTRAUTH_GENERIC:
>  		r = system_has_full_ptr_auth();
>  		break;
> +	case KVM_CAP_ARM_RME:
> +		r = static_key_enabled(&kvm_rme_is_available);
> +		break;
>  	default:
>  		r = 0;
>  	}
> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> index 31d7fa4c7c14..d0f707767d05 100644
> --- a/arch/arm64/kvm/mmu.c
> +++ b/arch/arm64/kvm/mmu.c
> @@ -840,6 +840,12 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
>  	struct kvm_pgtable *pgt = NULL;
>  
>  	write_lock(&kvm->mmu_lock);
> +	if (kvm_is_realm(kvm) &&
> +	    kvm_realm_state(kvm) != REALM_STATE_DYING) {
> +		/* TODO: teardown rtts */
> +		write_unlock(&kvm->mmu_lock);
> +		return;
> +	}
>  	pgt = mmu->pgt;
>  	if (pgt) {
>  		mmu->pgd_phys = 0;
> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> index e0267f672b8a..c165df174737 100644
> --- a/arch/arm64/kvm/reset.c
> +++ b/arch/arm64/kvm/reset.c
> @@ -395,3 +395,36 @@ int kvm_set_ipa_limit(void)
>  
>  	return 0;
>  }
> +
> +int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
> +{
> +	u64 mmfr0, mmfr1;
> +	u32 phys_shift;
> +	u32 ipa_limit = kvm_ipa_limit;
> +
> +	if (kvm_is_realm(kvm))
> +		ipa_limit = kvm_realm_ipa_limit();
> +
> +	if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
> +		return -EINVAL;
> +
> +	phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type);
> +	if (phys_shift) {
> +		if (phys_shift > ipa_limit ||
> +		    phys_shift < ARM64_MIN_PARANGE_BITS)
> +			return -EINVAL;
> +	} else {
> +		phys_shift = KVM_PHYS_SHIFT;
> +		if (phys_shift > ipa_limit) {
> +			pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n",
> +				     current->comm);
> +			return -EINVAL;
> +		}
> +	}
> +
> +	mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
> +	mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
> +	kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
> +
> +	return 0;
> +}
> diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c
> index f6b587bc116e..9f8c5a91b8fc 100644
> --- a/arch/arm64/kvm/rme.c
> +++ b/arch/arm64/kvm/rme.c
> @@ -5,9 +5,49 @@
>  
>  #include <linux/kvm_host.h>
>  
> +#include <asm/kvm_emulate.h>
> +#include <asm/kvm_mmu.h>
>  #include <asm/rmi_cmds.h>
>  #include <asm/virt.h>
>  
> +/************ FIXME: Copied from kvm/hyp/pgtable.c **********/
> +#include <asm/kvm_pgtable.h>
> +
> +struct kvm_pgtable_walk_data {
> +	struct kvm_pgtable		*pgt;
> +	struct kvm_pgtable_walker	*walker;
> +
> +	u64				addr;
> +	u64				end;
> +};
> +
> +static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr)
> +{
> +	u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */
> +	u64 mask = BIT(pgt->ia_bits) - 1;
> +
> +	return (addr & mask) >> shift;
> +}
> +
> +static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
> +{
> +	struct kvm_pgtable pgt = {
> +		.ia_bits	= ia_bits,
> +		.start_level	= start_level,
> +	};
> +
> +	return __kvm_pgd_page_idx(&pgt, -1ULL) + 1;
> +}
> +
> +/******************/
> +
> +static unsigned long rmm_feat_reg0;
> +
> +static bool rme_supports(unsigned long feature)
> +{
> +	return !!u64_get_bits(rmm_feat_reg0, feature);
> +}
> +
>  static int rmi_check_version(void)
>  {
>  	struct arm_smccc_res res;
> @@ -33,8 +73,319 @@ static int rmi_check_version(void)
>  	return 0;
>  }
>  
> +static unsigned long create_realm_feat_reg0(struct kvm *kvm)
> +{
> +	unsigned long ia_bits = VTCR_EL2_IPA(kvm->arch.vtcr);
> +	u64 feat_reg0 = 0;
> +
> +	int num_bps = u64_get_bits(rmm_feat_reg0,
> +				   RMI_FEATURE_REGISTER_0_NUM_BPS);
> +	int num_wps = u64_get_bits(rmm_feat_reg0,
> +				   RMI_FEATURE_REGISTER_0_NUM_WPS);
> +
> +	feat_reg0 |= u64_encode_bits(ia_bits, RMI_FEATURE_REGISTER_0_S2SZ);
> +	feat_reg0 |= u64_encode_bits(num_bps, RMI_FEATURE_REGISTER_0_NUM_BPS);
> +	feat_reg0 |= u64_encode_bits(num_wps, RMI_FEATURE_REGISTER_0_NUM_WPS);
> +
> +	return feat_reg0;
> +}
> +
> +u32 kvm_realm_ipa_limit(void)
> +{
> +	return u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_S2SZ);
> +}
> +
> +static u32 get_start_level(struct kvm *kvm)
> +{
> +	long sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, kvm->arch.vtcr);
> +
> +	return VTCR_EL2_TGRAN_SL0_BASE - sl0;
> +}
> +
> +static int realm_create_rd(struct kvm *kvm)
> +{
> +	struct realm *realm = &kvm->arch.realm;
> +	struct realm_params *params = realm->params;
> +	void *rd = NULL;
> +	phys_addr_t rd_phys, params_phys;
> +	struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
> +	unsigned int pgd_sz;
> +	int i, r;
> +
> +	if (WARN_ON(realm->rd) || WARN_ON(!realm->params))
> +		return -EEXIST;
> +
> +	rd = (void *)__get_free_page(GFP_KERNEL);
> +	if (!rd)
> +		return -ENOMEM;
> +
> +	rd_phys = virt_to_phys(rd);
> +	if (rmi_granule_delegate(rd_phys)) {
> +		r = -ENXIO;
> +		goto out;
> +	}
> +
> +	pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level);
> +	for (i = 0; i < pgd_sz; i++) {
> +		phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE;
> +
> +		if (rmi_granule_delegate(pgd_phys)) {
> +			r = -ENXIO;
> +			goto out_undelegate_tables;
> +		}
> +	}
> +
> +	params->rtt_level_start = get_start_level(kvm);
> +	params->rtt_num_start = pgd_sz;
> +	params->rtt_base = kvm->arch.mmu.pgd_phys;
> +	params->vmid = realm->vmid;
> +
> +	params_phys = virt_to_phys(params);
> +
> +	if (rmi_realm_create(rd_phys, params_phys)) {
> +		r = -ENXIO;
> +		goto out_undelegate_tables;
> +	}
> +
> +	realm->rd = rd;
> +	realm->ia_bits = VTCR_EL2_IPA(kvm->arch.vtcr);
> +
> +	if (WARN_ON(rmi_rec_aux_count(rd_phys, &realm->num_aux))) {
> +		WARN_ON(rmi_realm_destroy(rd_phys));
> +		goto out_undelegate_tables;
> +	}
> +
> +	return 0;
> +
> +out_undelegate_tables:
> +	while (--i >= 0) {
> +		phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE;
> +
> +		WARN_ON(rmi_granule_undelegate(pgd_phys));
> +	}
> +	WARN_ON(rmi_granule_undelegate(rd_phys));
> +out:
> +	free_page((unsigned long)rd);
> +	return r;
> +}
> +

Just curious. Wouldn't it be better to use IDR as this is ID allocation? There
were some efforts to change the use of bitmap allocation to IDR before.

> +/* Protects access to rme_vmid_bitmap */
> +static DEFINE_SPINLOCK(rme_vmid_lock);
> +static unsigned long *rme_vmid_bitmap;
> +
> +static int rme_vmid_init(void)
> +{
> +	unsigned int vmid_count = 1 << kvm_get_vmid_bits();
> +
> +	rme_vmid_bitmap = bitmap_zalloc(vmid_count, GFP_KERNEL);
> +	if (!rme_vmid_bitmap) {
> +		kvm_err("%s: Couldn't allocate rme vmid bitmap\n", __func__);
> +		return -ENOMEM;
> +	}
> +
> +	return 0;
> +}
> +
> +static int rme_vmid_reserve(void)
> +{
> +	int ret;
> +	unsigned int vmid_count = 1 << kvm_get_vmid_bits();
> +
> +	spin_lock(&rme_vmid_lock);
> +	ret = bitmap_find_free_region(rme_vmid_bitmap, vmid_count, 0);
> +	spin_unlock(&rme_vmid_lock);
> +
> +	return ret;
> +}
> +
> +static void rme_vmid_release(unsigned int vmid)
> +{
> +	spin_lock(&rme_vmid_lock);
> +	bitmap_release_region(rme_vmid_bitmap, vmid, 0);
> +	spin_unlock(&rme_vmid_lock);
> +}
> +
> +static int kvm_create_realm(struct kvm *kvm)
> +{
> +	struct realm *realm = &kvm->arch.realm;
> +	int ret;
> +
> +	if (!kvm_is_realm(kvm) || kvm_realm_state(kvm) != REALM_STATE_NONE)
> +		return -EEXIST;
> +
> +	ret = rme_vmid_reserve();
> +	if (ret < 0)
> +		return ret;
> +	realm->vmid = ret;
> +
> +	ret = realm_create_rd(kvm);
> +	if (ret) {
> +		rme_vmid_release(realm->vmid);
> +		return ret;
> +	}
> +
> +	WRITE_ONCE(realm->state, REALM_STATE_NEW);
> +
> +	/* The realm is up, free the parameters.  */
> +	free_page((unsigned long)realm->params);
> +	realm->params = NULL;
> +
> +	return 0;
> +}
> +
> +static int config_realm_hash_algo(struct realm *realm,
> +				  struct kvm_cap_arm_rme_config_item *cfg)
> +{
> +	switch (cfg->hash_algo) {
> +	case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA256:
> +		if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_256))
> +			return -EINVAL;
> +		break;
> +	case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA512:
> +		if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_512))
> +			return -EINVAL;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +	realm->params->measurement_algo = cfg->hash_algo;
> +	return 0;
> +}
> +
> +static int config_realm_sve(struct realm *realm,
> +			    struct kvm_cap_arm_rme_config_item *cfg)
> +{
> +	u64 features_0 = realm->params->features_0;
> +	int max_sve_vq = u64_get_bits(rmm_feat_reg0,
> +				      RMI_FEATURE_REGISTER_0_SVE_VL);
> +
> +	if (!rme_supports(RMI_FEATURE_REGISTER_0_SVE_EN))
> +		return -EINVAL;
> +
> +	if (cfg->sve_vq > max_sve_vq)
> +		return -EINVAL;
> +
> +	features_0 &= ~(RMI_FEATURE_REGISTER_0_SVE_EN |
> +			RMI_FEATURE_REGISTER_0_SVE_VL);
> +	features_0 |= u64_encode_bits(1, RMI_FEATURE_REGISTER_0_SVE_EN);
> +	features_0 |= u64_encode_bits(cfg->sve_vq,
> +				      RMI_FEATURE_REGISTER_0_SVE_VL);
> +
> +	realm->params->features_0 = features_0;
> +	return 0;
> +}
> +
> +static int kvm_rme_config_realm(struct kvm *kvm, struct kvm_enable_cap *cap)
> +{
> +	struct kvm_cap_arm_rme_config_item cfg;
> +	struct realm *realm = &kvm->arch.realm;
> +	int r = 0;
> +
> +	if (kvm_realm_state(kvm) != REALM_STATE_NONE)
> +		return -EBUSY;
> +
> +	if (copy_from_user(&cfg, (void __user *)cap->args[1], sizeof(cfg)))
> +		return -EFAULT;
> +
> +	switch (cfg.cfg) {
> +	case KVM_CAP_ARM_RME_CFG_RPV:
> +		memcpy(&realm->params->rpv, &cfg.rpv, sizeof(cfg.rpv));
> +		break;
> +	case KVM_CAP_ARM_RME_CFG_HASH_ALGO:
> +		r = config_realm_hash_algo(realm, &cfg);
> +		break;
> +	case KVM_CAP_ARM_RME_CFG_SVE:
> +		r = config_realm_sve(realm, &cfg);
> +		break;
> +	default:
> +		r = -EINVAL;
> +	}
> +
> +	return r;
> +}
> +
> +int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
> +{
> +	int r = 0;
> +
> +	switch (cap->args[0]) {
> +	case KVM_CAP_ARM_RME_CONFIG_REALM:
> +		r = kvm_rme_config_realm(kvm, cap);
> +		break;
> +	case KVM_CAP_ARM_RME_CREATE_RD:
> +		if (kvm->created_vcpus) {
> +			r = -EBUSY;
> +			break;
> +		}
> +
> +		r = kvm_create_realm(kvm);
> +		break;
> +	default:
> +		r = -EINVAL;
> +		break;
> +	}
> +
> +	return r;
> +}
> +
> +void kvm_destroy_realm(struct kvm *kvm)
> +{
> +	struct realm *realm = &kvm->arch.realm;
> +	struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
> +	unsigned int pgd_sz;
> +	int i;
> +
> +	if (realm->params) {
> +		free_page((unsigned long)realm->params);
> +		realm->params = NULL;
> +	}
> +
> +	if (kvm_realm_state(kvm) == REALM_STATE_NONE)
> +		return;
> +
> +	WRITE_ONCE(realm->state, REALM_STATE_DYING);
> +
> +	rme_vmid_release(realm->vmid);
> +
> +	if (realm->rd) {
> +		phys_addr_t rd_phys = virt_to_phys(realm->rd);
> +
> +		if (WARN_ON(rmi_realm_destroy(rd_phys)))
> +			return;
> +		if (WARN_ON(rmi_granule_undelegate(rd_phys)))
> +			return;
> +		free_page((unsigned long)realm->rd);
> +		realm->rd = NULL;
> +	}
> +
> +	pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level);
> +	for (i = 0; i < pgd_sz; i++) {
> +		phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE;
> +
> +		if (WARN_ON(rmi_granule_undelegate(pgd_phys)))
> +			return;
> +	}
> +
> +	kvm_free_stage2_pgd(&kvm->arch.mmu);
> +}
> +
> +int kvm_init_realm_vm(struct kvm *kvm)
> +{
> +	struct realm_params *params;
> +
> +	params = (struct realm_params *)get_zeroed_page(GFP_KERNEL);
> +	if (!params)
> +		return -ENOMEM;
> +
> +	params->features_0 = create_realm_feat_reg0(kvm);
> +	kvm->arch.realm.params = params;
> +	return 0;
> +}
> +
>  int kvm_init_rme(void)
>  {
> +	int ret;
> +
>  	if (PAGE_SIZE != SZ_4K)
>  		/* Only 4k page size on the host is supported */
>  		return 0;
> @@ -43,6 +394,12 @@ int kvm_init_rme(void)
>  		/* Continue without realm support */
>  		return 0;
>  
> +	ret = rme_vmid_init();
> +	if (ret)
> +		return ret;
> +
> +	WARN_ON(rmi_features(0, &rmm_feat_reg0));
> +
>  	/* Future patch will enable static branch kvm_rme_is_available */
>  
>  	return 0;
  
Steven Price March 1, 2023, 11:55 a.m. UTC | #4
On 13/02/2023 16:10, Zhi Wang wrote:
> On Fri, 27 Jan 2023 11:29:10 +0000
> Steven Price <steven.price@arm.com> wrote:
> 
>> Add the KVM_CAP_ARM_RME_CREATE_FD ioctl to create a realm. This involves
>> delegating pages to the RMM to hold the Realm Descriptor (RD) and for
>> the base level of the Realm Translation Tables (RTT). A VMID also need
>> to be picked, since the RMM has a separate VMID address space a
>> dedicated allocator is added for this purpose.
>>
>> KVM_CAP_ARM_RME_CONFIG_REALM is provided to allow configuring the realm
>> before it is created.
>>
>> Signed-off-by: Steven Price <steven.price@arm.com>
>> ---
>>  arch/arm64/include/asm/kvm_rme.h |  14 ++
>>  arch/arm64/kvm/arm.c             |  19 ++
>>  arch/arm64/kvm/mmu.c             |   6 +
>>  arch/arm64/kvm/reset.c           |  33 +++
>>  arch/arm64/kvm/rme.c             | 357 +++++++++++++++++++++++++++++++
>>  5 files changed, 429 insertions(+)
>>
>> diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h
>> index c26bc2c6770d..055a22accc08 100644
>> --- a/arch/arm64/include/asm/kvm_rme.h
>> +++ b/arch/arm64/include/asm/kvm_rme.h
>> @@ -6,6 +6,8 @@
>>  #ifndef __ASM_KVM_RME_H
>>  #define __ASM_KVM_RME_H
>>  
>> +#include <uapi/linux/kvm.h>
>> +
>>  enum realm_state {
>>  	REALM_STATE_NONE,
>>  	REALM_STATE_NEW,
>> @@ -15,8 +17,20 @@ enum realm_state {
>>  
>>  struct realm {
>>  	enum realm_state state;
>> +
>> +	void *rd;
>> +	struct realm_params *params;
>> +
>> +	unsigned long num_aux;
>> +	unsigned int vmid;
>> +	unsigned int ia_bits;
>>  };
>>  
> 
> Maybe more comments for this structure?

Agreed, this series is a bit light on comments. I'll try to improve for v2.

<snip>

> 
> Just curious. Wouldn't it be better to use IDR as this is ID allocation? There
> were some efforts to change the use of bitmap allocation to IDR before.

I'm not sure it makes much difference really. This matches KVM's
vmid_map, but here things are much more simple as there's no support for
the likes of VMID rollover (the number of Realm VMs is just capped at
the number of VMIDs).

IDR provides a lot of functionality we don't need, but equally I don't
think performance or memory usage are really a concern here.

Steve

>> +/* Protects access to rme_vmid_bitmap */
>> +static DEFINE_SPINLOCK(rme_vmid_lock);
>> +static unsigned long *rme_vmid_bitmap;
>> +
>> +static int rme_vmid_init(void)
>> +{
>> +	unsigned int vmid_count = 1 << kvm_get_vmid_bits();
>> +
>> +	rme_vmid_bitmap = bitmap_zalloc(vmid_count, GFP_KERNEL);
>> +	if (!rme_vmid_bitmap) {
>> +		kvm_err("%s: Couldn't allocate rme vmid bitmap\n", __func__);
>> +		return -ENOMEM;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +static int rme_vmid_reserve(void)
>> +{
>> +	int ret;
>> +	unsigned int vmid_count = 1 << kvm_get_vmid_bits();
>> +
>> +	spin_lock(&rme_vmid_lock);
>> +	ret = bitmap_find_free_region(rme_vmid_bitmap, vmid_count, 0);
>> +	spin_unlock(&rme_vmid_lock);
>> +
>> +	return ret;
>> +}
>> +
>> +static void rme_vmid_release(unsigned int vmid)
>> +{
>> +	spin_lock(&rme_vmid_lock);
>> +	bitmap_release_region(rme_vmid_bitmap, vmid, 0);
>> +	spin_unlock(&rme_vmid_lock);
>> +}
>> +
>> +static int kvm_create_realm(struct kvm *kvm)
>> +{
>> +	struct realm *realm = &kvm->arch.realm;
>> +	int ret;
>> +
>> +	if (!kvm_is_realm(kvm) || kvm_realm_state(kvm) != REALM_STATE_NONE)
>> +		return -EEXIST;
>> +
>> +	ret = rme_vmid_reserve();
>> +	if (ret < 0)
>> +		return ret;
>> +	realm->vmid = ret;
>> +
>> +	ret = realm_create_rd(kvm);
>> +	if (ret) {
>> +		rme_vmid_release(realm->vmid);
>> +		return ret;
>> +	}
>> +
>> +	WRITE_ONCE(realm->state, REALM_STATE_NEW);
>> +
>> +	/* The realm is up, free the parameters.  */
>> +	free_page((unsigned long)realm->params);
>> +	realm->params = NULL;
>> +
>> +	return 0;
>> +}
>> +
>> +static int config_realm_hash_algo(struct realm *realm,
>> +				  struct kvm_cap_arm_rme_config_item *cfg)
>> +{
>> +	switch (cfg->hash_algo) {
>> +	case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA256:
>> +		if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_256))
>> +			return -EINVAL;
>> +		break;
>> +	case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA512:
>> +		if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_512))
>> +			return -EINVAL;
>> +		break;
>> +	default:
>> +		return -EINVAL;
>> +	}
>> +	realm->params->measurement_algo = cfg->hash_algo;
>> +	return 0;
>> +}
>> +
>> +static int config_realm_sve(struct realm *realm,
>> +			    struct kvm_cap_arm_rme_config_item *cfg)
>> +{
>> +	u64 features_0 = realm->params->features_0;
>> +	int max_sve_vq = u64_get_bits(rmm_feat_reg0,
>> +				      RMI_FEATURE_REGISTER_0_SVE_VL);
>> +
>> +	if (!rme_supports(RMI_FEATURE_REGISTER_0_SVE_EN))
>> +		return -EINVAL;
>> +
>> +	if (cfg->sve_vq > max_sve_vq)
>> +		return -EINVAL;
>> +
>> +	features_0 &= ~(RMI_FEATURE_REGISTER_0_SVE_EN |
>> +			RMI_FEATURE_REGISTER_0_SVE_VL);
>> +	features_0 |= u64_encode_bits(1, RMI_FEATURE_REGISTER_0_SVE_EN);
>> +	features_0 |= u64_encode_bits(cfg->sve_vq,
>> +				      RMI_FEATURE_REGISTER_0_SVE_VL);
>> +
>> +	realm->params->features_0 = features_0;
>> +	return 0;
>> +}
>> +
>> +static int kvm_rme_config_realm(struct kvm *kvm, struct kvm_enable_cap *cap)
>> +{
>> +	struct kvm_cap_arm_rme_config_item cfg;
>> +	struct realm *realm = &kvm->arch.realm;
>> +	int r = 0;
>> +
>> +	if (kvm_realm_state(kvm) != REALM_STATE_NONE)
>> +		return -EBUSY;
>> +
>> +	if (copy_from_user(&cfg, (void __user *)cap->args[1], sizeof(cfg)))
>> +		return -EFAULT;
>> +
>> +	switch (cfg.cfg) {
>> +	case KVM_CAP_ARM_RME_CFG_RPV:
>> +		memcpy(&realm->params->rpv, &cfg.rpv, sizeof(cfg.rpv));
>> +		break;
>> +	case KVM_CAP_ARM_RME_CFG_HASH_ALGO:
>> +		r = config_realm_hash_algo(realm, &cfg);
>> +		break;
>> +	case KVM_CAP_ARM_RME_CFG_SVE:
>> +		r = config_realm_sve(realm, &cfg);
>> +		break;
>> +	default:
>> +		r = -EINVAL;
>> +	}
>> +
>> +	return r;
>> +}
>> +
>> +int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
>> +{
>> +	int r = 0;
>> +
>> +	switch (cap->args[0]) {
>> +	case KVM_CAP_ARM_RME_CONFIG_REALM:
>> +		r = kvm_rme_config_realm(kvm, cap);
>> +		break;
>> +	case KVM_CAP_ARM_RME_CREATE_RD:
>> +		if (kvm->created_vcpus) {
>> +			r = -EBUSY;
>> +			break;
>> +		}
>> +
>> +		r = kvm_create_realm(kvm);
>> +		break;
>> +	default:
>> +		r = -EINVAL;
>> +		break;
>> +	}
>> +
>> +	return r;
>> +}
>> +
>> +void kvm_destroy_realm(struct kvm *kvm)
>> +{
>> +	struct realm *realm = &kvm->arch.realm;
>> +	struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
>> +	unsigned int pgd_sz;
>> +	int i;
>> +
>> +	if (realm->params) {
>> +		free_page((unsigned long)realm->params);
>> +		realm->params = NULL;
>> +	}
>> +
>> +	if (kvm_realm_state(kvm) == REALM_STATE_NONE)
>> +		return;
>> +
>> +	WRITE_ONCE(realm->state, REALM_STATE_DYING);
>> +
>> +	rme_vmid_release(realm->vmid);
>> +
>> +	if (realm->rd) {
>> +		phys_addr_t rd_phys = virt_to_phys(realm->rd);
>> +
>> +		if (WARN_ON(rmi_realm_destroy(rd_phys)))
>> +			return;
>> +		if (WARN_ON(rmi_granule_undelegate(rd_phys)))
>> +			return;
>> +		free_page((unsigned long)realm->rd);
>> +		realm->rd = NULL;
>> +	}
>> +
>> +	pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level);
>> +	for (i = 0; i < pgd_sz; i++) {
>> +		phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE;
>> +
>> +		if (WARN_ON(rmi_granule_undelegate(pgd_phys)))
>> +			return;
>> +	}
>> +
>> +	kvm_free_stage2_pgd(&kvm->arch.mmu);
>> +}
>> +
>> +int kvm_init_realm_vm(struct kvm *kvm)
>> +{
>> +	struct realm_params *params;
>> +
>> +	params = (struct realm_params *)get_zeroed_page(GFP_KERNEL);
>> +	if (!params)
>> +		return -ENOMEM;
>> +
>> +	params->features_0 = create_realm_feat_reg0(kvm);
>> +	kvm->arch.realm.params = params;
>> +	return 0;
>> +}
>> +
>>  int kvm_init_rme(void)
>>  {
>> +	int ret;
>> +
>>  	if (PAGE_SIZE != SZ_4K)
>>  		/* Only 4k page size on the host is supported */
>>  		return 0;
>> @@ -43,6 +394,12 @@ int kvm_init_rme(void)
>>  		/* Continue without realm support */
>>  		return 0;
>>  
>> +	ret = rme_vmid_init();
>> +	if (ret)
>> +		return ret;
>> +
>> +	WARN_ON(rmi_features(0, &rmm_feat_reg0));
>> +
>>  	/* Future patch will enable static branch kvm_rme_is_available */
>>  
>>  	return 0;
>
  
Zhi Wang March 1, 2023, 8:33 p.m. UTC | #5
On Wed, 1 Mar 2023 11:55:17 +0000
Steven Price <steven.price@arm.com> wrote:

> On 13/02/2023 16:10, Zhi Wang wrote:
> > On Fri, 27 Jan 2023 11:29:10 +0000
> > Steven Price <steven.price@arm.com> wrote:
> > 
> >> Add the KVM_CAP_ARM_RME_CREATE_FD ioctl to create a realm. This involves
> >> delegating pages to the RMM to hold the Realm Descriptor (RD) and for
> >> the base level of the Realm Translation Tables (RTT). A VMID also need
> >> to be picked, since the RMM has a separate VMID address space a
> >> dedicated allocator is added for this purpose.
> >>
> >> KVM_CAP_ARM_RME_CONFIG_REALM is provided to allow configuring the realm
> >> before it is created.
> >>
> >> Signed-off-by: Steven Price <steven.price@arm.com>
> >> ---
> >>  arch/arm64/include/asm/kvm_rme.h |  14 ++
> >>  arch/arm64/kvm/arm.c             |  19 ++
> >>  arch/arm64/kvm/mmu.c             |   6 +
> >>  arch/arm64/kvm/reset.c           |  33 +++
> >>  arch/arm64/kvm/rme.c             | 357 +++++++++++++++++++++++++++++++
> >>  5 files changed, 429 insertions(+)
> >>
> >> diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h
> >> index c26bc2c6770d..055a22accc08 100644
> >> --- a/arch/arm64/include/asm/kvm_rme.h
> >> +++ b/arch/arm64/include/asm/kvm_rme.h
> >> @@ -6,6 +6,8 @@
> >>  #ifndef __ASM_KVM_RME_H
> >>  #define __ASM_KVM_RME_H
> >>  
> >> +#include <uapi/linux/kvm.h>
> >> +
> >>  enum realm_state {
> >>  	REALM_STATE_NONE,
> >>  	REALM_STATE_NEW,
> >> @@ -15,8 +17,20 @@ enum realm_state {
> >>  
> >>  struct realm {
> >>  	enum realm_state state;
> >> +
> >> +	void *rd;
> >> +	struct realm_params *params;
> >> +
> >> +	unsigned long num_aux;
> >> +	unsigned int vmid;
> >> +	unsigned int ia_bits;
> >>  };
> >>  
> > 
> > Maybe more comments for this structure?
> 
> Agreed, this series is a bit light on comments. I'll try to improve for v2.
> 
> <snip>
> 
> > 
> > Just curious. Wouldn't it be better to use IDR as this is ID allocation? There
> > were some efforts to change the use of bitmap allocation to IDR before.
> 
> I'm not sure it makes much difference really. This matches KVM's
> vmid_map, but here things are much more simple as there's no support for
> the likes of VMID rollover (the number of Realm VMs is just capped at
> the number of VMIDs).
> 
> IDR provides a lot of functionality we don't need, but equally I don't
> think performance or memory usage are really a concern here.

Agree. I am not opposed to the current approach. I gave this comment because
I vaguely remember there were some patch bundles to covert bitmap to IDR in
the kernel before. So I think it would be better to raise it up and get a
conclusion. It would save some efforts for the people who might jump in the
review later.

> 
> Steve
> 
> >> +/* Protects access to rme_vmid_bitmap */
> >> +static DEFINE_SPINLOCK(rme_vmid_lock);
> >> +static unsigned long *rme_vmid_bitmap;
> >> +
> >> +static int rme_vmid_init(void)
> >> +{
> >> +	unsigned int vmid_count = 1 << kvm_get_vmid_bits();
> >> +
> >> +	rme_vmid_bitmap = bitmap_zalloc(vmid_count, GFP_KERNEL);
> >> +	if (!rme_vmid_bitmap) {
> >> +		kvm_err("%s: Couldn't allocate rme vmid bitmap\n", __func__);
> >> +		return -ENOMEM;
> >> +	}
> >> +
> >> +	return 0;
> >> +}
> >> +
> >> +static int rme_vmid_reserve(void)
> >> +{
> >> +	int ret;
> >> +	unsigned int vmid_count = 1 << kvm_get_vmid_bits();
> >> +
> >> +	spin_lock(&rme_vmid_lock);
> >> +	ret = bitmap_find_free_region(rme_vmid_bitmap, vmid_count, 0);
> >> +	spin_unlock(&rme_vmid_lock);
> >> +
> >> +	return ret;
> >> +}
> >> +
> >> +static void rme_vmid_release(unsigned int vmid)
> >> +{
> >> +	spin_lock(&rme_vmid_lock);
> >> +	bitmap_release_region(rme_vmid_bitmap, vmid, 0);
> >> +	spin_unlock(&rme_vmid_lock);
> >> +}
> >> +
> >> +static int kvm_create_realm(struct kvm *kvm)
> >> +{
> >> +	struct realm *realm = &kvm->arch.realm;
> >> +	int ret;
> >> +
> >> +	if (!kvm_is_realm(kvm) || kvm_realm_state(kvm) != REALM_STATE_NONE)
> >> +		return -EEXIST;
> >> +
> >> +	ret = rme_vmid_reserve();
> >> +	if (ret < 0)
> >> +		return ret;
> >> +	realm->vmid = ret;
> >> +
> >> +	ret = realm_create_rd(kvm);
> >> +	if (ret) {
> >> +		rme_vmid_release(realm->vmid);
> >> +		return ret;
> >> +	}
> >> +
> >> +	WRITE_ONCE(realm->state, REALM_STATE_NEW);
> >> +
> >> +	/* The realm is up, free the parameters.  */
> >> +	free_page((unsigned long)realm->params);
> >> +	realm->params = NULL;
> >> +
> >> +	return 0;
> >> +}
> >> +
> >> +static int config_realm_hash_algo(struct realm *realm,
> >> +				  struct kvm_cap_arm_rme_config_item *cfg)
> >> +{
> >> +	switch (cfg->hash_algo) {
> >> +	case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA256:
> >> +		if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_256))
> >> +			return -EINVAL;
> >> +		break;
> >> +	case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA512:
> >> +		if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_512))
> >> +			return -EINVAL;
> >> +		break;
> >> +	default:
> >> +		return -EINVAL;
> >> +	}
> >> +	realm->params->measurement_algo = cfg->hash_algo;
> >> +	return 0;
> >> +}
> >> +
> >> +static int config_realm_sve(struct realm *realm,
> >> +			    struct kvm_cap_arm_rme_config_item *cfg)
> >> +{
> >> +	u64 features_0 = realm->params->features_0;
> >> +	int max_sve_vq = u64_get_bits(rmm_feat_reg0,
> >> +				      RMI_FEATURE_REGISTER_0_SVE_VL);
> >> +
> >> +	if (!rme_supports(RMI_FEATURE_REGISTER_0_SVE_EN))
> >> +		return -EINVAL;
> >> +
> >> +	if (cfg->sve_vq > max_sve_vq)
> >> +		return -EINVAL;
> >> +
> >> +	features_0 &= ~(RMI_FEATURE_REGISTER_0_SVE_EN |
> >> +			RMI_FEATURE_REGISTER_0_SVE_VL);
> >> +	features_0 |= u64_encode_bits(1, RMI_FEATURE_REGISTER_0_SVE_EN);
> >> +	features_0 |= u64_encode_bits(cfg->sve_vq,
> >> +				      RMI_FEATURE_REGISTER_0_SVE_VL);
> >> +
> >> +	realm->params->features_0 = features_0;
> >> +	return 0;
> >> +}
> >> +
> >> +static int kvm_rme_config_realm(struct kvm *kvm, struct kvm_enable_cap *cap)
> >> +{
> >> +	struct kvm_cap_arm_rme_config_item cfg;
> >> +	struct realm *realm = &kvm->arch.realm;
> >> +	int r = 0;
> >> +
> >> +	if (kvm_realm_state(kvm) != REALM_STATE_NONE)
> >> +		return -EBUSY;
> >> +
> >> +	if (copy_from_user(&cfg, (void __user *)cap->args[1], sizeof(cfg)))
> >> +		return -EFAULT;
> >> +
> >> +	switch (cfg.cfg) {
> >> +	case KVM_CAP_ARM_RME_CFG_RPV:
> >> +		memcpy(&realm->params->rpv, &cfg.rpv, sizeof(cfg.rpv));
> >> +		break;
> >> +	case KVM_CAP_ARM_RME_CFG_HASH_ALGO:
> >> +		r = config_realm_hash_algo(realm, &cfg);
> >> +		break;
> >> +	case KVM_CAP_ARM_RME_CFG_SVE:
> >> +		r = config_realm_sve(realm, &cfg);
> >> +		break;
> >> +	default:
> >> +		r = -EINVAL;
> >> +	}
> >> +
> >> +	return r;
> >> +}
> >> +
> >> +int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
> >> +{
> >> +	int r = 0;
> >> +
> >> +	switch (cap->args[0]) {
> >> +	case KVM_CAP_ARM_RME_CONFIG_REALM:
> >> +		r = kvm_rme_config_realm(kvm, cap);
> >> +		break;
> >> +	case KVM_CAP_ARM_RME_CREATE_RD:
> >> +		if (kvm->created_vcpus) {
> >> +			r = -EBUSY;
> >> +			break;
> >> +		}
> >> +
> >> +		r = kvm_create_realm(kvm);
> >> +		break;
> >> +	default:
> >> +		r = -EINVAL;
> >> +		break;
> >> +	}
> >> +
> >> +	return r;
> >> +}
> >> +
> >> +void kvm_destroy_realm(struct kvm *kvm)
> >> +{
> >> +	struct realm *realm = &kvm->arch.realm;
> >> +	struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
> >> +	unsigned int pgd_sz;
> >> +	int i;
> >> +
> >> +	if (realm->params) {
> >> +		free_page((unsigned long)realm->params);
> >> +		realm->params = NULL;
> >> +	}
> >> +
> >> +	if (kvm_realm_state(kvm) == REALM_STATE_NONE)
> >> +		return;
> >> +
> >> +	WRITE_ONCE(realm->state, REALM_STATE_DYING);
> >> +
> >> +	rme_vmid_release(realm->vmid);
> >> +
> >> +	if (realm->rd) {
> >> +		phys_addr_t rd_phys = virt_to_phys(realm->rd);
> >> +
> >> +		if (WARN_ON(rmi_realm_destroy(rd_phys)))
> >> +			return;
> >> +		if (WARN_ON(rmi_granule_undelegate(rd_phys)))
> >> +			return;
> >> +		free_page((unsigned long)realm->rd);
> >> +		realm->rd = NULL;
> >> +	}
> >> +
> >> +	pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level);
> >> +	for (i = 0; i < pgd_sz; i++) {
> >> +		phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE;
> >> +
> >> +		if (WARN_ON(rmi_granule_undelegate(pgd_phys)))
> >> +			return;
> >> +	}
> >> +
> >> +	kvm_free_stage2_pgd(&kvm->arch.mmu);
> >> +}
> >> +
> >> +int kvm_init_realm_vm(struct kvm *kvm)
> >> +{
> >> +	struct realm_params *params;
> >> +
> >> +	params = (struct realm_params *)get_zeroed_page(GFP_KERNEL);
> >> +	if (!params)
> >> +		return -ENOMEM;
> >> +
> >> +	params->features_0 = create_realm_feat_reg0(kvm);
> >> +	kvm->arch.realm.params = params;
> >> +	return 0;
> >> +}
> >> +
> >>  int kvm_init_rme(void)
> >>  {
> >> +	int ret;
> >> +
> >>  	if (PAGE_SIZE != SZ_4K)
> >>  		/* Only 4k page size on the host is supported */
> >>  		return 0;
> >> @@ -43,6 +394,12 @@ int kvm_init_rme(void)
> >>  		/* Continue without realm support */
> >>  		return 0;
> >>  
> >> +	ret = rme_vmid_init();
> >> +	if (ret)
> >> +		return ret;
> >> +
> >> +	WARN_ON(rmi_features(0, &rmm_feat_reg0));
> >> +
> >>  	/* Future patch will enable static branch kvm_rme_is_available */
> >>  
> >>  	return 0;
> > 
>
  
Zhi Wang March 6, 2023, 7:10 p.m. UTC | #6
On Fri, 27 Jan 2023 11:29:10 +0000
Steven Price <steven.price@arm.com> wrote:

> Add the KVM_CAP_ARM_RME_CREATE_FD ioctl to create a realm. This involves
> delegating pages to the RMM to hold the Realm Descriptor (RD) and for
> the base level of the Realm Translation Tables (RTT). A VMID also need
> to be picked, since the RMM has a separate VMID address space a
> dedicated allocator is added for this purpose.
> 
> KVM_CAP_ARM_RME_CONFIG_REALM is provided to allow configuring the realm
> before it is created.
> 
> Signed-off-by: Steven Price <steven.price@arm.com>
> ---
>  arch/arm64/include/asm/kvm_rme.h |  14 ++
>  arch/arm64/kvm/arm.c             |  19 ++
>  arch/arm64/kvm/mmu.c             |   6 +
>  arch/arm64/kvm/reset.c           |  33 +++
>  arch/arm64/kvm/rme.c             | 357 +++++++++++++++++++++++++++++++
>  5 files changed, 429 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h
> index c26bc2c6770d..055a22accc08 100644
> --- a/arch/arm64/include/asm/kvm_rme.h
> +++ b/arch/arm64/include/asm/kvm_rme.h
> @@ -6,6 +6,8 @@
>  #ifndef __ASM_KVM_RME_H
>  #define __ASM_KVM_RME_H
>  
> +#include <uapi/linux/kvm.h>
> +
>  enum realm_state {
>  	REALM_STATE_NONE,
>  	REALM_STATE_NEW,
> @@ -15,8 +17,20 @@ enum realm_state {
>  
>  struct realm {
>  	enum realm_state state;
> +
> +	void *rd;
> +	struct realm_params *params;
> +
> +	unsigned long num_aux;
> +	unsigned int vmid;
> +	unsigned int ia_bits;
>  };
>  
>  int kvm_init_rme(void);
> +u32 kvm_realm_ipa_limit(void);
> +
> +int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap);
> +int kvm_init_realm_vm(struct kvm *kvm);
> +void kvm_destroy_realm(struct kvm *kvm);
>  
>  #endif
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index d97b39d042ab..50f54a63732a 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -103,6 +103,13 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
>  		r = 0;
>  		set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags);
>  		break;
> +	case KVM_CAP_ARM_RME:
> +		if (!static_branch_unlikely(&kvm_rme_is_available))
> +			return -EINVAL;
> +		mutex_lock(&kvm->lock);
> +		r = kvm_realm_enable_cap(kvm, cap);
> +		mutex_unlock(&kvm->lock);
> +		break;
>  	default:
>  		r = -EINVAL;
>  		break;
> @@ -172,6 +179,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>  	 */
>  	kvm->arch.dfr0_pmuver.imp = kvm_arm_pmu_get_pmuver_limit();
>  
> +	/* Initialise the realm bits after the generic bits are enabled */
> +	if (kvm_is_realm(kvm)) {
> +		ret = kvm_init_realm_vm(kvm);
> +		if (ret)
> +			goto err_free_cpumask;
> +	}
> +
>  	return 0;
>  
>  err_free_cpumask:
> @@ -204,6 +218,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
>  	kvm_destroy_vcpus(kvm);
>  
>  	kvm_unshare_hyp(kvm, kvm + 1);
> +
> +	kvm_destroy_realm(kvm);
>  }
>  
>  int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
> @@ -300,6 +316,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>  	case KVM_CAP_ARM_PTRAUTH_GENERIC:
>  		r = system_has_full_ptr_auth();
>  		break;
> +	case KVM_CAP_ARM_RME:
> +		r = static_key_enabled(&kvm_rme_is_available);
> +		break;
>  	default:
>  		r = 0;
>  	}
> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> index 31d7fa4c7c14..d0f707767d05 100644
> --- a/arch/arm64/kvm/mmu.c
> +++ b/arch/arm64/kvm/mmu.c
> @@ -840,6 +840,12 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
>  	struct kvm_pgtable *pgt = NULL;
>  
>  	write_lock(&kvm->mmu_lock);
> +	if (kvm_is_realm(kvm) &&
> +	    kvm_realm_state(kvm) != REALM_STATE_DYING) {
> +		/* TODO: teardown rtts */
> +		write_unlock(&kvm->mmu_lock);
> +		return;
> +	}
>  	pgt = mmu->pgt;
>  	if (pgt) {
>  		mmu->pgd_phys = 0;
> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> index e0267f672b8a..c165df174737 100644
> --- a/arch/arm64/kvm/reset.c
> +++ b/arch/arm64/kvm/reset.c
> @@ -395,3 +395,36 @@ int kvm_set_ipa_limit(void)
>  
>  	return 0;
>  }
> +

The below function doesn't have an user in this patch. Also,
it looks like a part of copy from kvm_init_stage2_mmu()
in arch/arm64/kvm/mmu.c.

> +int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
> +{
> +	u64 mmfr0, mmfr1;
> +	u32 phys_shift;
> +	u32 ipa_limit = kvm_ipa_limit;
> +
> +	if (kvm_is_realm(kvm))
> +		ipa_limit = kvm_realm_ipa_limit();
> +
> +	if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
> +		return -EINVAL;
> +
> +	phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type);
> +	if (phys_shift) {
> +		if (phys_shift > ipa_limit ||
> +		    phys_shift < ARM64_MIN_PARANGE_BITS)
> +			return -EINVAL;
> +	} else {
> +		phys_shift = KVM_PHYS_SHIFT;
> +		if (phys_shift > ipa_limit) {
> +			pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n",
> +				     current->comm);
> +			return -EINVAL;
> +		}
> +	}
> +
> +	mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
> +	mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
> +	kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
> +
> +	return 0;
> +}
> diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c
> index f6b587bc116e..9f8c5a91b8fc 100644
> --- a/arch/arm64/kvm/rme.c
> +++ b/arch/arm64/kvm/rme.c
> @@ -5,9 +5,49 @@
>  
>  #include <linux/kvm_host.h>
>  
> +#include <asm/kvm_emulate.h>
> +#include <asm/kvm_mmu.h>
>  #include <asm/rmi_cmds.h>
>  #include <asm/virt.h>
>  
> +/************ FIXME: Copied from kvm/hyp/pgtable.c **********/
> +#include <asm/kvm_pgtable.h>
> +
> +struct kvm_pgtable_walk_data {
> +	struct kvm_pgtable		*pgt;
> +	struct kvm_pgtable_walker	*walker;
> +
> +	u64				addr;
> +	u64				end;
> +};
> +
> +static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr)
> +{
> +	u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */
> +	u64 mask = BIT(pgt->ia_bits) - 1;
> +
> +	return (addr & mask) >> shift;
> +}
> +
> +static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
> +{
> +	struct kvm_pgtable pgt = {
> +		.ia_bits	= ia_bits,
> +		.start_level	= start_level,
> +	};
> +
> +	return __kvm_pgd_page_idx(&pgt, -1ULL) + 1;
> +}
> +
> +/******************/
> +
> +static unsigned long rmm_feat_reg0;
> +
> +static bool rme_supports(unsigned long feature)
> +{
> +	return !!u64_get_bits(rmm_feat_reg0, feature);
> +}
> +
>  static int rmi_check_version(void)
>  {
>  	struct arm_smccc_res res;
> @@ -33,8 +73,319 @@ static int rmi_check_version(void)
>  	return 0;
>  }
>  
> +static unsigned long create_realm_feat_reg0(struct kvm *kvm)
> +{
> +	unsigned long ia_bits = VTCR_EL2_IPA(kvm->arch.vtcr);
> +	u64 feat_reg0 = 0;
> +
> +	int num_bps = u64_get_bits(rmm_feat_reg0,
> +				   RMI_FEATURE_REGISTER_0_NUM_BPS);
> +	int num_wps = u64_get_bits(rmm_feat_reg0,
> +				   RMI_FEATURE_REGISTER_0_NUM_WPS);
> +
> +	feat_reg0 |= u64_encode_bits(ia_bits, RMI_FEATURE_REGISTER_0_S2SZ);
> +	feat_reg0 |= u64_encode_bits(num_bps, RMI_FEATURE_REGISTER_0_NUM_BPS);
> +	feat_reg0 |= u64_encode_bits(num_wps, RMI_FEATURE_REGISTER_0_NUM_WPS);
> +
> +	return feat_reg0;
> +}
> +
> +u32 kvm_realm_ipa_limit(void)
> +{
> +	return u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_S2SZ);
> +}
> +
> +static u32 get_start_level(struct kvm *kvm)
> +{
> +	long sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, kvm->arch.vtcr);
> +
> +	return VTCR_EL2_TGRAN_SL0_BASE - sl0;
> +}
> +
> +static int realm_create_rd(struct kvm *kvm)
> +{
> +	struct realm *realm = &kvm->arch.realm;
> +	struct realm_params *params = realm->params;
> +	void *rd = NULL;
> +	phys_addr_t rd_phys, params_phys;
> +	struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
> +	unsigned int pgd_sz;
> +	int i, r;
> +
> +	if (WARN_ON(realm->rd) || WARN_ON(!realm->params))
> +		return -EEXIST;
> +
> +	rd = (void *)__get_free_page(GFP_KERNEL);
> +	if (!rd)
> +		return -ENOMEM;
> +
> +	rd_phys = virt_to_phys(rd);
> +	if (rmi_granule_delegate(rd_phys)) {
> +		r = -ENXIO;
> +		goto out;
> +	}
> +
> +	pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level);
> +	for (i = 0; i < pgd_sz; i++) {
> +		phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE;
> +
> +		if (rmi_granule_delegate(pgd_phys)) {
> +			r = -ENXIO;
> +			goto out_undelegate_tables;
> +		}
> +	}
> +
> +	params->rtt_level_start = get_start_level(kvm);
> +	params->rtt_num_start = pgd_sz;
> +	params->rtt_base = kvm->arch.mmu.pgd_phys;
> +	params->vmid = realm->vmid;
> +
> +	params_phys = virt_to_phys(params);
> +
> +	if (rmi_realm_create(rd_phys, params_phys)) {
> +		r = -ENXIO;
> +		goto out_undelegate_tables;
> +	}
> +
> +	realm->rd = rd;
> +	realm->ia_bits = VTCR_EL2_IPA(kvm->arch.vtcr);
> +
> +	if (WARN_ON(rmi_rec_aux_count(rd_phys, &realm->num_aux))) {
> +		WARN_ON(rmi_realm_destroy(rd_phys));
> +		goto out_undelegate_tables;
> +	}
> +
> +	return 0;
> +
> +out_undelegate_tables:
> +	while (--i >= 0) {
> +		phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE;
> +
> +		WARN_ON(rmi_granule_undelegate(pgd_phys));
> +	}
> +	WARN_ON(rmi_granule_undelegate(rd_phys));
> +out:
> +	free_page((unsigned long)rd);
> +	return r;
> +}
> +
> +/* Protects access to rme_vmid_bitmap */
> +static DEFINE_SPINLOCK(rme_vmid_lock);
> +static unsigned long *rme_vmid_bitmap;
> +
> +static int rme_vmid_init(void)
> +{
> +	unsigned int vmid_count = 1 << kvm_get_vmid_bits();
> +
> +	rme_vmid_bitmap = bitmap_zalloc(vmid_count, GFP_KERNEL);
> +	if (!rme_vmid_bitmap) {
> +		kvm_err("%s: Couldn't allocate rme vmid bitmap\n", __func__);
> +		return -ENOMEM;
> +	}
> +
> +	return 0;
> +}
> +
> +static int rme_vmid_reserve(void)
> +{
> +	int ret;
> +	unsigned int vmid_count = 1 << kvm_get_vmid_bits();
> +
> +	spin_lock(&rme_vmid_lock);
> +	ret = bitmap_find_free_region(rme_vmid_bitmap, vmid_count, 0);
> +	spin_unlock(&rme_vmid_lock);
> +
> +	return ret;
> +}
> +
> +static void rme_vmid_release(unsigned int vmid)
> +{
> +	spin_lock(&rme_vmid_lock);
> +	bitmap_release_region(rme_vmid_bitmap, vmid, 0);
> +	spin_unlock(&rme_vmid_lock);
> +}
> +
> +static int kvm_create_realm(struct kvm *kvm)
> +{
> +	struct realm *realm = &kvm->arch.realm;
> +	int ret;
> +
> +	if (!kvm_is_realm(kvm) || kvm_realm_state(kvm) != REALM_STATE_NONE)
> +		return -EEXIST;
> +
> +	ret = rme_vmid_reserve();
> +	if (ret < 0)
> +		return ret;
> +	realm->vmid = ret;
> +
> +	ret = realm_create_rd(kvm);
> +	if (ret) {
> +		rme_vmid_release(realm->vmid);
> +		return ret;
> +	}
> +
> +	WRITE_ONCE(realm->state, REALM_STATE_NEW);
> +
> +	/* The realm is up, free the parameters.  */
> +	free_page((unsigned long)realm->params);
> +	realm->params = NULL;
> +
> +	return 0;
> +}
> +
> +static int config_realm_hash_algo(struct realm *realm,
> +				  struct kvm_cap_arm_rme_config_item *cfg)
> +{
> +	switch (cfg->hash_algo) {
> +	case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA256:
> +		if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_256))
> +			return -EINVAL;
> +		break;
> +	case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA512:
> +		if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_512))
> +			return -EINVAL;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +	realm->params->measurement_algo = cfg->hash_algo;
> +	return 0;
> +}
> +
> +static int config_realm_sve(struct realm *realm,
> +			    struct kvm_cap_arm_rme_config_item *cfg)
> +{
> +	u64 features_0 = realm->params->features_0;
> +	int max_sve_vq = u64_get_bits(rmm_feat_reg0,
> +				      RMI_FEATURE_REGISTER_0_SVE_VL);
> +
> +	if (!rme_supports(RMI_FEATURE_REGISTER_0_SVE_EN))
> +		return -EINVAL;
> +
> +	if (cfg->sve_vq > max_sve_vq)
> +		return -EINVAL;
> +
> +	features_0 &= ~(RMI_FEATURE_REGISTER_0_SVE_EN |
> +			RMI_FEATURE_REGISTER_0_SVE_VL);
> +	features_0 |= u64_encode_bits(1, RMI_FEATURE_REGISTER_0_SVE_EN);
> +	features_0 |= u64_encode_bits(cfg->sve_vq,
> +				      RMI_FEATURE_REGISTER_0_SVE_VL);
> +
> +	realm->params->features_0 = features_0;
> +	return 0;
> +}
> +
> +static int kvm_rme_config_realm(struct kvm *kvm, struct kvm_enable_cap *cap)
> +{
> +	struct kvm_cap_arm_rme_config_item cfg;
> +	struct realm *realm = &kvm->arch.realm;
> +	int r = 0;
> +
> +	if (kvm_realm_state(kvm) != REALM_STATE_NONE)
> +		return -EBUSY;
> +
> +	if (copy_from_user(&cfg, (void __user *)cap->args[1], sizeof(cfg)))
> +		return -EFAULT;
> +
> +	switch (cfg.cfg) {
> +	case KVM_CAP_ARM_RME_CFG_RPV:
> +		memcpy(&realm->params->rpv, &cfg.rpv, sizeof(cfg.rpv));
> +		break;
> +	case KVM_CAP_ARM_RME_CFG_HASH_ALGO:
> +		r = config_realm_hash_algo(realm, &cfg);
> +		break;
> +	case KVM_CAP_ARM_RME_CFG_SVE:
> +		r = config_realm_sve(realm, &cfg);
> +		break;
> +	default:
> +		r = -EINVAL;
> +	}
> +
> +	return r;
> +}
> +
> +int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
> +{
> +	int r = 0;
> +
> +	switch (cap->args[0]) {
> +	case KVM_CAP_ARM_RME_CONFIG_REALM:
> +		r = kvm_rme_config_realm(kvm, cap);
> +		break;
> +	case KVM_CAP_ARM_RME_CREATE_RD:
> +		if (kvm->created_vcpus) {
> +			r = -EBUSY;
> +			break;
> +		}
> +
> +		r = kvm_create_realm(kvm);
> +		break;
> +	default:
> +		r = -EINVAL;
> +		break;
> +	}
> +
> +	return r;
> +}
> +
> +void kvm_destroy_realm(struct kvm *kvm)
> +{
> +	struct realm *realm = &kvm->arch.realm;
> +	struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
> +	unsigned int pgd_sz;
> +	int i;
> +
> +	if (realm->params) {
> +		free_page((unsigned long)realm->params);
> +		realm->params = NULL;
> +	}
> +
> +	if (kvm_realm_state(kvm) == REALM_STATE_NONE)
> +		return;
> +
> +	WRITE_ONCE(realm->state, REALM_STATE_DYING);
> +
> +	rme_vmid_release(realm->vmid);
> +
> +	if (realm->rd) {
> +		phys_addr_t rd_phys = virt_to_phys(realm->rd);
> +
> +		if (WARN_ON(rmi_realm_destroy(rd_phys)))
> +			return;
> +		if (WARN_ON(rmi_granule_undelegate(rd_phys)))
> +			return;
> +		free_page((unsigned long)realm->rd);
> +		realm->rd = NULL;
> +	}
> +
> +	pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level);
> +	for (i = 0; i < pgd_sz; i++) {
> +		phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE;
> +
> +		if (WARN_ON(rmi_granule_undelegate(pgd_phys)))
> +			return;
> +	}
> +
> +	kvm_free_stage2_pgd(&kvm->arch.mmu);
> +}
> +
> +int kvm_init_realm_vm(struct kvm *kvm)
> +{
> +	struct realm_params *params;
> +
> +	params = (struct realm_params *)get_zeroed_page(GFP_KERNEL);
> +	if (!params)
> +		return -ENOMEM;
> +
> +	params->features_0 = create_realm_feat_reg0(kvm);
> +	kvm->arch.realm.params = params;
> +	return 0;
> +}
> +
>  int kvm_init_rme(void)
>  {
> +	int ret;
> +
>  	if (PAGE_SIZE != SZ_4K)
>  		/* Only 4k page size on the host is supported */
>  		return 0;
> @@ -43,6 +394,12 @@ int kvm_init_rme(void)
>  		/* Continue without realm support */
>  		return 0;
>  
> +	ret = rme_vmid_init();
> +	if (ret)
> +		return ret;
> +
> +	WARN_ON(rmi_features(0, &rmm_feat_reg0));
> +
>  	/* Future patch will enable static branch kvm_rme_is_available */
>  
>  	return 0;
  
Steven Price March 10, 2023, 3:47 p.m. UTC | #7
On 06/03/2023 19:10, Zhi Wang wrote:
> On Fri, 27 Jan 2023 11:29:10 +0000
> Steven Price <steven.price@arm.com> wrote:
> 
>> Add the KVM_CAP_ARM_RME_CREATE_FD ioctl to create a realm. This involves
>> delegating pages to the RMM to hold the Realm Descriptor (RD) and for
>> the base level of the Realm Translation Tables (RTT). A VMID also need
>> to be picked, since the RMM has a separate VMID address space a
>> dedicated allocator is added for this purpose.
>>
>> KVM_CAP_ARM_RME_CONFIG_REALM is provided to allow configuring the realm
>> before it is created.
>>
>> Signed-off-by: Steven Price <steven.price@arm.com>
>> ---
>>  arch/arm64/include/asm/kvm_rme.h |  14 ++
>>  arch/arm64/kvm/arm.c             |  19 ++
>>  arch/arm64/kvm/mmu.c             |   6 +
>>  arch/arm64/kvm/reset.c           |  33 +++
>>  arch/arm64/kvm/rme.c             | 357 +++++++++++++++++++++++++++++++
>>  5 files changed, 429 insertions(+)
>>
>> diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h
>> index c26bc2c6770d..055a22accc08 100644
>> --- a/arch/arm64/include/asm/kvm_rme.h
>> +++ b/arch/arm64/include/asm/kvm_rme.h
>> @@ -6,6 +6,8 @@
>>  #ifndef __ASM_KVM_RME_H
>>  #define __ASM_KVM_RME_H
>>  
>> +#include <uapi/linux/kvm.h>
>> +
>>  enum realm_state {
>>  	REALM_STATE_NONE,
>>  	REALM_STATE_NEW,
>> @@ -15,8 +17,20 @@ enum realm_state {
>>  
>>  struct realm {
>>  	enum realm_state state;
>> +
>> +	void *rd;
>> +	struct realm_params *params;
>> +
>> +	unsigned long num_aux;
>> +	unsigned int vmid;
>> +	unsigned int ia_bits;
>>  };
>>  
>>  int kvm_init_rme(void);
>> +u32 kvm_realm_ipa_limit(void);
>> +
>> +int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap);
>> +int kvm_init_realm_vm(struct kvm *kvm);
>> +void kvm_destroy_realm(struct kvm *kvm);
>>  
>>  #endif
>> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
>> index d97b39d042ab..50f54a63732a 100644
>> --- a/arch/arm64/kvm/arm.c
>> +++ b/arch/arm64/kvm/arm.c
>> @@ -103,6 +103,13 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
>>  		r = 0;
>>  		set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags);
>>  		break;
>> +	case KVM_CAP_ARM_RME:
>> +		if (!static_branch_unlikely(&kvm_rme_is_available))
>> +			return -EINVAL;
>> +		mutex_lock(&kvm->lock);
>> +		r = kvm_realm_enable_cap(kvm, cap);
>> +		mutex_unlock(&kvm->lock);
>> +		break;
>>  	default:
>>  		r = -EINVAL;
>>  		break;
>> @@ -172,6 +179,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
>>  	 */
>>  	kvm->arch.dfr0_pmuver.imp = kvm_arm_pmu_get_pmuver_limit();
>>  
>> +	/* Initialise the realm bits after the generic bits are enabled */
>> +	if (kvm_is_realm(kvm)) {
>> +		ret = kvm_init_realm_vm(kvm);
>> +		if (ret)
>> +			goto err_free_cpumask;
>> +	}
>> +
>>  	return 0;
>>  
>>  err_free_cpumask:
>> @@ -204,6 +218,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
>>  	kvm_destroy_vcpus(kvm);
>>  
>>  	kvm_unshare_hyp(kvm, kvm + 1);
>> +
>> +	kvm_destroy_realm(kvm);
>>  }
>>  
>>  int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>> @@ -300,6 +316,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>>  	case KVM_CAP_ARM_PTRAUTH_GENERIC:
>>  		r = system_has_full_ptr_auth();
>>  		break;
>> +	case KVM_CAP_ARM_RME:
>> +		r = static_key_enabled(&kvm_rme_is_available);
>> +		break;
>>  	default:
>>  		r = 0;
>>  	}
>> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
>> index 31d7fa4c7c14..d0f707767d05 100644
>> --- a/arch/arm64/kvm/mmu.c
>> +++ b/arch/arm64/kvm/mmu.c
>> @@ -840,6 +840,12 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
>>  	struct kvm_pgtable *pgt = NULL;
>>  
>>  	write_lock(&kvm->mmu_lock);
>> +	if (kvm_is_realm(kvm) &&
>> +	    kvm_realm_state(kvm) != REALM_STATE_DYING) {
>> +		/* TODO: teardown rtts */
>> +		write_unlock(&kvm->mmu_lock);
>> +		return;
>> +	}
>>  	pgt = mmu->pgt;
>>  	if (pgt) {
>>  		mmu->pgd_phys = 0;
>> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
>> index e0267f672b8a..c165df174737 100644
>> --- a/arch/arm64/kvm/reset.c
>> +++ b/arch/arm64/kvm/reset.c
>> @@ -395,3 +395,36 @@ int kvm_set_ipa_limit(void)
>>  
>>  	return 0;
>>  }
>> +
> 
> The below function doesn't have an user in this patch. Also,
> it looks like a part of copy from kvm_init_stage2_mmu()
> in arch/arm64/kvm/mmu.c.

Good spot ;) Yes I discovered this, it should have been removed - it's
no longer used. I think this was an error when I was rebasing:
kvm_arm_setup-stage2() was removed in 315775ff7c6d ("KVM: arm64:
Consolidate stage-2 initialisation into a single function").

Steve

>> +int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
>> +{
>> +	u64 mmfr0, mmfr1;
>> +	u32 phys_shift;
>> +	u32 ipa_limit = kvm_ipa_limit;
>> +
>> +	if (kvm_is_realm(kvm))
>> +		ipa_limit = kvm_realm_ipa_limit();
>> +
>> +	if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
>> +		return -EINVAL;
>> +
>> +	phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type);
>> +	if (phys_shift) {
>> +		if (phys_shift > ipa_limit ||
>> +		    phys_shift < ARM64_MIN_PARANGE_BITS)
>> +			return -EINVAL;
>> +	} else {
>> +		phys_shift = KVM_PHYS_SHIFT;
>> +		if (phys_shift > ipa_limit) {
>> +			pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n",
>> +				     current->comm);
>> +			return -EINVAL;
>> +		}
>> +	}
>> +
>> +	mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
>> +	mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
>> +	kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
>> +
>> +	return 0;
>> +}
>> diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c
>> index f6b587bc116e..9f8c5a91b8fc 100644
>> --- a/arch/arm64/kvm/rme.c
>> +++ b/arch/arm64/kvm/rme.c
>> @@ -5,9 +5,49 @@
>>  
>>  #include <linux/kvm_host.h>
>>  
>> +#include <asm/kvm_emulate.h>
>> +#include <asm/kvm_mmu.h>
>>  #include <asm/rmi_cmds.h>
>>  #include <asm/virt.h>
>>  
>> +/************ FIXME: Copied from kvm/hyp/pgtable.c **********/
>> +#include <asm/kvm_pgtable.h>
>> +
>> +struct kvm_pgtable_walk_data {
>> +	struct kvm_pgtable		*pgt;
>> +	struct kvm_pgtable_walker	*walker;
>> +
>> +	u64				addr;
>> +	u64				end;
>> +};
>> +
>> +static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr)
>> +{
>> +	u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */
>> +	u64 mask = BIT(pgt->ia_bits) - 1;
>> +
>> +	return (addr & mask) >> shift;
>> +}
>> +
>> +static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
>> +{
>> +	struct kvm_pgtable pgt = {
>> +		.ia_bits	= ia_bits,
>> +		.start_level	= start_level,
>> +	};
>> +
>> +	return __kvm_pgd_page_idx(&pgt, -1ULL) + 1;
>> +}
>> +
>> +/******************/
>> +
>> +static unsigned long rmm_feat_reg0;
>> +
>> +static bool rme_supports(unsigned long feature)
>> +{
>> +	return !!u64_get_bits(rmm_feat_reg0, feature);
>> +}
>> +
>>  static int rmi_check_version(void)
>>  {
>>  	struct arm_smccc_res res;
>> @@ -33,8 +73,319 @@ static int rmi_check_version(void)
>>  	return 0;
>>  }
>>  
>> +static unsigned long create_realm_feat_reg0(struct kvm *kvm)
>> +{
>> +	unsigned long ia_bits = VTCR_EL2_IPA(kvm->arch.vtcr);
>> +	u64 feat_reg0 = 0;
>> +
>> +	int num_bps = u64_get_bits(rmm_feat_reg0,
>> +				   RMI_FEATURE_REGISTER_0_NUM_BPS);
>> +	int num_wps = u64_get_bits(rmm_feat_reg0,
>> +				   RMI_FEATURE_REGISTER_0_NUM_WPS);
>> +
>> +	feat_reg0 |= u64_encode_bits(ia_bits, RMI_FEATURE_REGISTER_0_S2SZ);
>> +	feat_reg0 |= u64_encode_bits(num_bps, RMI_FEATURE_REGISTER_0_NUM_BPS);
>> +	feat_reg0 |= u64_encode_bits(num_wps, RMI_FEATURE_REGISTER_0_NUM_WPS);
>> +
>> +	return feat_reg0;
>> +}
>> +
>> +u32 kvm_realm_ipa_limit(void)
>> +{
>> +	return u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_S2SZ);
>> +}
>> +
>> +static u32 get_start_level(struct kvm *kvm)
>> +{
>> +	long sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, kvm->arch.vtcr);
>> +
>> +	return VTCR_EL2_TGRAN_SL0_BASE - sl0;
>> +}
>> +
>> +static int realm_create_rd(struct kvm *kvm)
>> +{
>> +	struct realm *realm = &kvm->arch.realm;
>> +	struct realm_params *params = realm->params;
>> +	void *rd = NULL;
>> +	phys_addr_t rd_phys, params_phys;
>> +	struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
>> +	unsigned int pgd_sz;
>> +	int i, r;
>> +
>> +	if (WARN_ON(realm->rd) || WARN_ON(!realm->params))
>> +		return -EEXIST;
>> +
>> +	rd = (void *)__get_free_page(GFP_KERNEL);
>> +	if (!rd)
>> +		return -ENOMEM;
>> +
>> +	rd_phys = virt_to_phys(rd);
>> +	if (rmi_granule_delegate(rd_phys)) {
>> +		r = -ENXIO;
>> +		goto out;
>> +	}
>> +
>> +	pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level);
>> +	for (i = 0; i < pgd_sz; i++) {
>> +		phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE;
>> +
>> +		if (rmi_granule_delegate(pgd_phys)) {
>> +			r = -ENXIO;
>> +			goto out_undelegate_tables;
>> +		}
>> +	}
>> +
>> +	params->rtt_level_start = get_start_level(kvm);
>> +	params->rtt_num_start = pgd_sz;
>> +	params->rtt_base = kvm->arch.mmu.pgd_phys;
>> +	params->vmid = realm->vmid;
>> +
>> +	params_phys = virt_to_phys(params);
>> +
>> +	if (rmi_realm_create(rd_phys, params_phys)) {
>> +		r = -ENXIO;
>> +		goto out_undelegate_tables;
>> +	}
>> +
>> +	realm->rd = rd;
>> +	realm->ia_bits = VTCR_EL2_IPA(kvm->arch.vtcr);
>> +
>> +	if (WARN_ON(rmi_rec_aux_count(rd_phys, &realm->num_aux))) {
>> +		WARN_ON(rmi_realm_destroy(rd_phys));
>> +		goto out_undelegate_tables;
>> +	}
>> +
>> +	return 0;
>> +
>> +out_undelegate_tables:
>> +	while (--i >= 0) {
>> +		phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE;
>> +
>> +		WARN_ON(rmi_granule_undelegate(pgd_phys));
>> +	}
>> +	WARN_ON(rmi_granule_undelegate(rd_phys));
>> +out:
>> +	free_page((unsigned long)rd);
>> +	return r;
>> +}
>> +
>> +/* Protects access to rme_vmid_bitmap */
>> +static DEFINE_SPINLOCK(rme_vmid_lock);
>> +static unsigned long *rme_vmid_bitmap;
>> +
>> +static int rme_vmid_init(void)
>> +{
>> +	unsigned int vmid_count = 1 << kvm_get_vmid_bits();
>> +
>> +	rme_vmid_bitmap = bitmap_zalloc(vmid_count, GFP_KERNEL);
>> +	if (!rme_vmid_bitmap) {
>> +		kvm_err("%s: Couldn't allocate rme vmid bitmap\n", __func__);
>> +		return -ENOMEM;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +static int rme_vmid_reserve(void)
>> +{
>> +	int ret;
>> +	unsigned int vmid_count = 1 << kvm_get_vmid_bits();
>> +
>> +	spin_lock(&rme_vmid_lock);
>> +	ret = bitmap_find_free_region(rme_vmid_bitmap, vmid_count, 0);
>> +	spin_unlock(&rme_vmid_lock);
>> +
>> +	return ret;
>> +}
>> +
>> +static void rme_vmid_release(unsigned int vmid)
>> +{
>> +	spin_lock(&rme_vmid_lock);
>> +	bitmap_release_region(rme_vmid_bitmap, vmid, 0);
>> +	spin_unlock(&rme_vmid_lock);
>> +}
>> +
>> +static int kvm_create_realm(struct kvm *kvm)
>> +{
>> +	struct realm *realm = &kvm->arch.realm;
>> +	int ret;
>> +
>> +	if (!kvm_is_realm(kvm) || kvm_realm_state(kvm) != REALM_STATE_NONE)
>> +		return -EEXIST;
>> +
>> +	ret = rme_vmid_reserve();
>> +	if (ret < 0)
>> +		return ret;
>> +	realm->vmid = ret;
>> +
>> +	ret = realm_create_rd(kvm);
>> +	if (ret) {
>> +		rme_vmid_release(realm->vmid);
>> +		return ret;
>> +	}
>> +
>> +	WRITE_ONCE(realm->state, REALM_STATE_NEW);
>> +
>> +	/* The realm is up, free the parameters.  */
>> +	free_page((unsigned long)realm->params);
>> +	realm->params = NULL;
>> +
>> +	return 0;
>> +}
>> +
>> +static int config_realm_hash_algo(struct realm *realm,
>> +				  struct kvm_cap_arm_rme_config_item *cfg)
>> +{
>> +	switch (cfg->hash_algo) {
>> +	case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA256:
>> +		if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_256))
>> +			return -EINVAL;
>> +		break;
>> +	case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA512:
>> +		if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_512))
>> +			return -EINVAL;
>> +		break;
>> +	default:
>> +		return -EINVAL;
>> +	}
>> +	realm->params->measurement_algo = cfg->hash_algo;
>> +	return 0;
>> +}
>> +
>> +static int config_realm_sve(struct realm *realm,
>> +			    struct kvm_cap_arm_rme_config_item *cfg)
>> +{
>> +	u64 features_0 = realm->params->features_0;
>> +	int max_sve_vq = u64_get_bits(rmm_feat_reg0,
>> +				      RMI_FEATURE_REGISTER_0_SVE_VL);
>> +
>> +	if (!rme_supports(RMI_FEATURE_REGISTER_0_SVE_EN))
>> +		return -EINVAL;
>> +
>> +	if (cfg->sve_vq > max_sve_vq)
>> +		return -EINVAL;
>> +
>> +	features_0 &= ~(RMI_FEATURE_REGISTER_0_SVE_EN |
>> +			RMI_FEATURE_REGISTER_0_SVE_VL);
>> +	features_0 |= u64_encode_bits(1, RMI_FEATURE_REGISTER_0_SVE_EN);
>> +	features_0 |= u64_encode_bits(cfg->sve_vq,
>> +				      RMI_FEATURE_REGISTER_0_SVE_VL);
>> +
>> +	realm->params->features_0 = features_0;
>> +	return 0;
>> +}
>> +
>> +static int kvm_rme_config_realm(struct kvm *kvm, struct kvm_enable_cap *cap)
>> +{
>> +	struct kvm_cap_arm_rme_config_item cfg;
>> +	struct realm *realm = &kvm->arch.realm;
>> +	int r = 0;
>> +
>> +	if (kvm_realm_state(kvm) != REALM_STATE_NONE)
>> +		return -EBUSY;
>> +
>> +	if (copy_from_user(&cfg, (void __user *)cap->args[1], sizeof(cfg)))
>> +		return -EFAULT;
>> +
>> +	switch (cfg.cfg) {
>> +	case KVM_CAP_ARM_RME_CFG_RPV:
>> +		memcpy(&realm->params->rpv, &cfg.rpv, sizeof(cfg.rpv));
>> +		break;
>> +	case KVM_CAP_ARM_RME_CFG_HASH_ALGO:
>> +		r = config_realm_hash_algo(realm, &cfg);
>> +		break;
>> +	case KVM_CAP_ARM_RME_CFG_SVE:
>> +		r = config_realm_sve(realm, &cfg);
>> +		break;
>> +	default:
>> +		r = -EINVAL;
>> +	}
>> +
>> +	return r;
>> +}
>> +
>> +int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
>> +{
>> +	int r = 0;
>> +
>> +	switch (cap->args[0]) {
>> +	case KVM_CAP_ARM_RME_CONFIG_REALM:
>> +		r = kvm_rme_config_realm(kvm, cap);
>> +		break;
>> +	case KVM_CAP_ARM_RME_CREATE_RD:
>> +		if (kvm->created_vcpus) {
>> +			r = -EBUSY;
>> +			break;
>> +		}
>> +
>> +		r = kvm_create_realm(kvm);
>> +		break;
>> +	default:
>> +		r = -EINVAL;
>> +		break;
>> +	}
>> +
>> +	return r;
>> +}
>> +
>> +void kvm_destroy_realm(struct kvm *kvm)
>> +{
>> +	struct realm *realm = &kvm->arch.realm;
>> +	struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
>> +	unsigned int pgd_sz;
>> +	int i;
>> +
>> +	if (realm->params) {
>> +		free_page((unsigned long)realm->params);
>> +		realm->params = NULL;
>> +	}
>> +
>> +	if (kvm_realm_state(kvm) == REALM_STATE_NONE)
>> +		return;
>> +
>> +	WRITE_ONCE(realm->state, REALM_STATE_DYING);
>> +
>> +	rme_vmid_release(realm->vmid);
>> +
>> +	if (realm->rd) {
>> +		phys_addr_t rd_phys = virt_to_phys(realm->rd);
>> +
>> +		if (WARN_ON(rmi_realm_destroy(rd_phys)))
>> +			return;
>> +		if (WARN_ON(rmi_granule_undelegate(rd_phys)))
>> +			return;
>> +		free_page((unsigned long)realm->rd);
>> +		realm->rd = NULL;
>> +	}
>> +
>> +	pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level);
>> +	for (i = 0; i < pgd_sz; i++) {
>> +		phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE;
>> +
>> +		if (WARN_ON(rmi_granule_undelegate(pgd_phys)))
>> +			return;
>> +	}
>> +
>> +	kvm_free_stage2_pgd(&kvm->arch.mmu);
>> +}
>> +
>> +int kvm_init_realm_vm(struct kvm *kvm)
>> +{
>> +	struct realm_params *params;
>> +
>> +	params = (struct realm_params *)get_zeroed_page(GFP_KERNEL);
>> +	if (!params)
>> +		return -ENOMEM;
>> +
>> +	params->features_0 = create_realm_feat_reg0(kvm);
>> +	kvm->arch.realm.params = params;
>> +	return 0;
>> +}
>> +
>>  int kvm_init_rme(void)
>>  {
>> +	int ret;
>> +
>>  	if (PAGE_SIZE != SZ_4K)
>>  		/* Only 4k page size on the host is supported */
>>  		return 0;
>> @@ -43,6 +394,12 @@ int kvm_init_rme(void)
>>  		/* Continue without realm support */
>>  		return 0;
>>  
>> +	ret = rme_vmid_init();
>> +	if (ret)
>> +		return ret;
>> +
>> +	WARN_ON(rmi_features(0, &rmm_feat_reg0));
>> +
>>  	/* Future patch will enable static branch kvm_rme_is_available */
>>  
>>  	return 0;
>
  

Patch

diff --git a/arch/arm64/include/asm/kvm_rme.h b/arch/arm64/include/asm/kvm_rme.h
index c26bc2c6770d..055a22accc08 100644
--- a/arch/arm64/include/asm/kvm_rme.h
+++ b/arch/arm64/include/asm/kvm_rme.h
@@ -6,6 +6,8 @@ 
 #ifndef __ASM_KVM_RME_H
 #define __ASM_KVM_RME_H
 
+#include <uapi/linux/kvm.h>
+
 enum realm_state {
 	REALM_STATE_NONE,
 	REALM_STATE_NEW,
@@ -15,8 +17,20 @@  enum realm_state {
 
 struct realm {
 	enum realm_state state;
+
+	void *rd;
+	struct realm_params *params;
+
+	unsigned long num_aux;
+	unsigned int vmid;
+	unsigned int ia_bits;
 };
 
 int kvm_init_rme(void);
+u32 kvm_realm_ipa_limit(void);
+
+int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap);
+int kvm_init_realm_vm(struct kvm *kvm);
+void kvm_destroy_realm(struct kvm *kvm);
 
 #endif
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index d97b39d042ab..50f54a63732a 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -103,6 +103,13 @@  int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 		r = 0;
 		set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags);
 		break;
+	case KVM_CAP_ARM_RME:
+		if (!static_branch_unlikely(&kvm_rme_is_available))
+			return -EINVAL;
+		mutex_lock(&kvm->lock);
+		r = kvm_realm_enable_cap(kvm, cap);
+		mutex_unlock(&kvm->lock);
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -172,6 +179,13 @@  int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	 */
 	kvm->arch.dfr0_pmuver.imp = kvm_arm_pmu_get_pmuver_limit();
 
+	/* Initialise the realm bits after the generic bits are enabled */
+	if (kvm_is_realm(kvm)) {
+		ret = kvm_init_realm_vm(kvm);
+		if (ret)
+			goto err_free_cpumask;
+	}
+
 	return 0;
 
 err_free_cpumask:
@@ -204,6 +218,8 @@  void kvm_arch_destroy_vm(struct kvm *kvm)
 	kvm_destroy_vcpus(kvm);
 
 	kvm_unshare_hyp(kvm, kvm + 1);
+
+	kvm_destroy_realm(kvm);
 }
 
 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@@ -300,6 +316,9 @@  int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_ARM_PTRAUTH_GENERIC:
 		r = system_has_full_ptr_auth();
 		break;
+	case KVM_CAP_ARM_RME:
+		r = static_key_enabled(&kvm_rme_is_available);
+		break;
 	default:
 		r = 0;
 	}
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 31d7fa4c7c14..d0f707767d05 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -840,6 +840,12 @@  void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
 	struct kvm_pgtable *pgt = NULL;
 
 	write_lock(&kvm->mmu_lock);
+	if (kvm_is_realm(kvm) &&
+	    kvm_realm_state(kvm) != REALM_STATE_DYING) {
+		/* TODO: teardown rtts */
+		write_unlock(&kvm->mmu_lock);
+		return;
+	}
 	pgt = mmu->pgt;
 	if (pgt) {
 		mmu->pgd_phys = 0;
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index e0267f672b8a..c165df174737 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -395,3 +395,36 @@  int kvm_set_ipa_limit(void)
 
 	return 0;
 }
+
+int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
+{
+	u64 mmfr0, mmfr1;
+	u32 phys_shift;
+	u32 ipa_limit = kvm_ipa_limit;
+
+	if (kvm_is_realm(kvm))
+		ipa_limit = kvm_realm_ipa_limit();
+
+	if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
+		return -EINVAL;
+
+	phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type);
+	if (phys_shift) {
+		if (phys_shift > ipa_limit ||
+		    phys_shift < ARM64_MIN_PARANGE_BITS)
+			return -EINVAL;
+	} else {
+		phys_shift = KVM_PHYS_SHIFT;
+		if (phys_shift > ipa_limit) {
+			pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n",
+				     current->comm);
+			return -EINVAL;
+		}
+	}
+
+	mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+	mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
+	kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
+
+	return 0;
+}
diff --git a/arch/arm64/kvm/rme.c b/arch/arm64/kvm/rme.c
index f6b587bc116e..9f8c5a91b8fc 100644
--- a/arch/arm64/kvm/rme.c
+++ b/arch/arm64/kvm/rme.c
@@ -5,9 +5,49 @@ 
 
 #include <linux/kvm_host.h>
 
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_mmu.h>
 #include <asm/rmi_cmds.h>
 #include <asm/virt.h>
 
+/************ FIXME: Copied from kvm/hyp/pgtable.c **********/
+#include <asm/kvm_pgtable.h>
+
+struct kvm_pgtable_walk_data {
+	struct kvm_pgtable		*pgt;
+	struct kvm_pgtable_walker	*walker;
+
+	u64				addr;
+	u64				end;
+};
+
+static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr)
+{
+	u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */
+	u64 mask = BIT(pgt->ia_bits) - 1;
+
+	return (addr & mask) >> shift;
+}
+
+static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
+{
+	struct kvm_pgtable pgt = {
+		.ia_bits	= ia_bits,
+		.start_level	= start_level,
+	};
+
+	return __kvm_pgd_page_idx(&pgt, -1ULL) + 1;
+}
+
+/******************/
+
+static unsigned long rmm_feat_reg0;
+
+static bool rme_supports(unsigned long feature)
+{
+	return !!u64_get_bits(rmm_feat_reg0, feature);
+}
+
 static int rmi_check_version(void)
 {
 	struct arm_smccc_res res;
@@ -33,8 +73,319 @@  static int rmi_check_version(void)
 	return 0;
 }
 
+static unsigned long create_realm_feat_reg0(struct kvm *kvm)
+{
+	unsigned long ia_bits = VTCR_EL2_IPA(kvm->arch.vtcr);
+	u64 feat_reg0 = 0;
+
+	int num_bps = u64_get_bits(rmm_feat_reg0,
+				   RMI_FEATURE_REGISTER_0_NUM_BPS);
+	int num_wps = u64_get_bits(rmm_feat_reg0,
+				   RMI_FEATURE_REGISTER_0_NUM_WPS);
+
+	feat_reg0 |= u64_encode_bits(ia_bits, RMI_FEATURE_REGISTER_0_S2SZ);
+	feat_reg0 |= u64_encode_bits(num_bps, RMI_FEATURE_REGISTER_0_NUM_BPS);
+	feat_reg0 |= u64_encode_bits(num_wps, RMI_FEATURE_REGISTER_0_NUM_WPS);
+
+	return feat_reg0;
+}
+
+u32 kvm_realm_ipa_limit(void)
+{
+	return u64_get_bits(rmm_feat_reg0, RMI_FEATURE_REGISTER_0_S2SZ);
+}
+
+static u32 get_start_level(struct kvm *kvm)
+{
+	long sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, kvm->arch.vtcr);
+
+	return VTCR_EL2_TGRAN_SL0_BASE - sl0;
+}
+
+static int realm_create_rd(struct kvm *kvm)
+{
+	struct realm *realm = &kvm->arch.realm;
+	struct realm_params *params = realm->params;
+	void *rd = NULL;
+	phys_addr_t rd_phys, params_phys;
+	struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
+	unsigned int pgd_sz;
+	int i, r;
+
+	if (WARN_ON(realm->rd) || WARN_ON(!realm->params))
+		return -EEXIST;
+
+	rd = (void *)__get_free_page(GFP_KERNEL);
+	if (!rd)
+		return -ENOMEM;
+
+	rd_phys = virt_to_phys(rd);
+	if (rmi_granule_delegate(rd_phys)) {
+		r = -ENXIO;
+		goto out;
+	}
+
+	pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level);
+	for (i = 0; i < pgd_sz; i++) {
+		phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE;
+
+		if (rmi_granule_delegate(pgd_phys)) {
+			r = -ENXIO;
+			goto out_undelegate_tables;
+		}
+	}
+
+	params->rtt_level_start = get_start_level(kvm);
+	params->rtt_num_start = pgd_sz;
+	params->rtt_base = kvm->arch.mmu.pgd_phys;
+	params->vmid = realm->vmid;
+
+	params_phys = virt_to_phys(params);
+
+	if (rmi_realm_create(rd_phys, params_phys)) {
+		r = -ENXIO;
+		goto out_undelegate_tables;
+	}
+
+	realm->rd = rd;
+	realm->ia_bits = VTCR_EL2_IPA(kvm->arch.vtcr);
+
+	if (WARN_ON(rmi_rec_aux_count(rd_phys, &realm->num_aux))) {
+		WARN_ON(rmi_realm_destroy(rd_phys));
+		goto out_undelegate_tables;
+	}
+
+	return 0;
+
+out_undelegate_tables:
+	while (--i >= 0) {
+		phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE;
+
+		WARN_ON(rmi_granule_undelegate(pgd_phys));
+	}
+	WARN_ON(rmi_granule_undelegate(rd_phys));
+out:
+	free_page((unsigned long)rd);
+	return r;
+}
+
+/* Protects access to rme_vmid_bitmap */
+static DEFINE_SPINLOCK(rme_vmid_lock);
+static unsigned long *rme_vmid_bitmap;
+
+static int rme_vmid_init(void)
+{
+	unsigned int vmid_count = 1 << kvm_get_vmid_bits();
+
+	rme_vmid_bitmap = bitmap_zalloc(vmid_count, GFP_KERNEL);
+	if (!rme_vmid_bitmap) {
+		kvm_err("%s: Couldn't allocate rme vmid bitmap\n", __func__);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int rme_vmid_reserve(void)
+{
+	int ret;
+	unsigned int vmid_count = 1 << kvm_get_vmid_bits();
+
+	spin_lock(&rme_vmid_lock);
+	ret = bitmap_find_free_region(rme_vmid_bitmap, vmid_count, 0);
+	spin_unlock(&rme_vmid_lock);
+
+	return ret;
+}
+
+static void rme_vmid_release(unsigned int vmid)
+{
+	spin_lock(&rme_vmid_lock);
+	bitmap_release_region(rme_vmid_bitmap, vmid, 0);
+	spin_unlock(&rme_vmid_lock);
+}
+
+static int kvm_create_realm(struct kvm *kvm)
+{
+	struct realm *realm = &kvm->arch.realm;
+	int ret;
+
+	if (!kvm_is_realm(kvm) || kvm_realm_state(kvm) != REALM_STATE_NONE)
+		return -EEXIST;
+
+	ret = rme_vmid_reserve();
+	if (ret < 0)
+		return ret;
+	realm->vmid = ret;
+
+	ret = realm_create_rd(kvm);
+	if (ret) {
+		rme_vmid_release(realm->vmid);
+		return ret;
+	}
+
+	WRITE_ONCE(realm->state, REALM_STATE_NEW);
+
+	/* The realm is up, free the parameters.  */
+	free_page((unsigned long)realm->params);
+	realm->params = NULL;
+
+	return 0;
+}
+
+static int config_realm_hash_algo(struct realm *realm,
+				  struct kvm_cap_arm_rme_config_item *cfg)
+{
+	switch (cfg->hash_algo) {
+	case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA256:
+		if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_256))
+			return -EINVAL;
+		break;
+	case KVM_CAP_ARM_RME_MEASUREMENT_ALGO_SHA512:
+		if (!rme_supports(RMI_FEATURE_REGISTER_0_HASH_SHA_512))
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+	realm->params->measurement_algo = cfg->hash_algo;
+	return 0;
+}
+
+static int config_realm_sve(struct realm *realm,
+			    struct kvm_cap_arm_rme_config_item *cfg)
+{
+	u64 features_0 = realm->params->features_0;
+	int max_sve_vq = u64_get_bits(rmm_feat_reg0,
+				      RMI_FEATURE_REGISTER_0_SVE_VL);
+
+	if (!rme_supports(RMI_FEATURE_REGISTER_0_SVE_EN))
+		return -EINVAL;
+
+	if (cfg->sve_vq > max_sve_vq)
+		return -EINVAL;
+
+	features_0 &= ~(RMI_FEATURE_REGISTER_0_SVE_EN |
+			RMI_FEATURE_REGISTER_0_SVE_VL);
+	features_0 |= u64_encode_bits(1, RMI_FEATURE_REGISTER_0_SVE_EN);
+	features_0 |= u64_encode_bits(cfg->sve_vq,
+				      RMI_FEATURE_REGISTER_0_SVE_VL);
+
+	realm->params->features_0 = features_0;
+	return 0;
+}
+
+static int kvm_rme_config_realm(struct kvm *kvm, struct kvm_enable_cap *cap)
+{
+	struct kvm_cap_arm_rme_config_item cfg;
+	struct realm *realm = &kvm->arch.realm;
+	int r = 0;
+
+	if (kvm_realm_state(kvm) != REALM_STATE_NONE)
+		return -EBUSY;
+
+	if (copy_from_user(&cfg, (void __user *)cap->args[1], sizeof(cfg)))
+		return -EFAULT;
+
+	switch (cfg.cfg) {
+	case KVM_CAP_ARM_RME_CFG_RPV:
+		memcpy(&realm->params->rpv, &cfg.rpv, sizeof(cfg.rpv));
+		break;
+	case KVM_CAP_ARM_RME_CFG_HASH_ALGO:
+		r = config_realm_hash_algo(realm, &cfg);
+		break;
+	case KVM_CAP_ARM_RME_CFG_SVE:
+		r = config_realm_sve(realm, &cfg);
+		break;
+	default:
+		r = -EINVAL;
+	}
+
+	return r;
+}
+
+int kvm_realm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
+{
+	int r = 0;
+
+	switch (cap->args[0]) {
+	case KVM_CAP_ARM_RME_CONFIG_REALM:
+		r = kvm_rme_config_realm(kvm, cap);
+		break;
+	case KVM_CAP_ARM_RME_CREATE_RD:
+		if (kvm->created_vcpus) {
+			r = -EBUSY;
+			break;
+		}
+
+		r = kvm_create_realm(kvm);
+		break;
+	default:
+		r = -EINVAL;
+		break;
+	}
+
+	return r;
+}
+
+void kvm_destroy_realm(struct kvm *kvm)
+{
+	struct realm *realm = &kvm->arch.realm;
+	struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
+	unsigned int pgd_sz;
+	int i;
+
+	if (realm->params) {
+		free_page((unsigned long)realm->params);
+		realm->params = NULL;
+	}
+
+	if (kvm_realm_state(kvm) == REALM_STATE_NONE)
+		return;
+
+	WRITE_ONCE(realm->state, REALM_STATE_DYING);
+
+	rme_vmid_release(realm->vmid);
+
+	if (realm->rd) {
+		phys_addr_t rd_phys = virt_to_phys(realm->rd);
+
+		if (WARN_ON(rmi_realm_destroy(rd_phys)))
+			return;
+		if (WARN_ON(rmi_granule_undelegate(rd_phys)))
+			return;
+		free_page((unsigned long)realm->rd);
+		realm->rd = NULL;
+	}
+
+	pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level);
+	for (i = 0; i < pgd_sz; i++) {
+		phys_addr_t pgd_phys = kvm->arch.mmu.pgd_phys + i * PAGE_SIZE;
+
+		if (WARN_ON(rmi_granule_undelegate(pgd_phys)))
+			return;
+	}
+
+	kvm_free_stage2_pgd(&kvm->arch.mmu);
+}
+
+int kvm_init_realm_vm(struct kvm *kvm)
+{
+	struct realm_params *params;
+
+	params = (struct realm_params *)get_zeroed_page(GFP_KERNEL);
+	if (!params)
+		return -ENOMEM;
+
+	params->features_0 = create_realm_feat_reg0(kvm);
+	kvm->arch.realm.params = params;
+	return 0;
+}
+
 int kvm_init_rme(void)
 {
+	int ret;
+
 	if (PAGE_SIZE != SZ_4K)
 		/* Only 4k page size on the host is supported */
 		return 0;
@@ -43,6 +394,12 @@  int kvm_init_rme(void)
 		/* Continue without realm support */
 		return 0;
 
+	ret = rme_vmid_init();
+	if (ret)
+		return ret;
+
+	WARN_ON(rmi_features(0, &rmm_feat_reg0));
+
 	/* Future patch will enable static branch kvm_rme_is_available */
 
 	return 0;