[v2,11/11] RISC-V: KVM: Implement firmware events

Message ID 20221215170046.2010255-12-atishp@rivosinc.com
State New
Headers
Series KVM perf support |

Commit Message

Atish Patra Dec. 15, 2022, 5 p.m. UTC
  SBI PMU extension defines a set of firmware events which can provide
useful information to guests about number of SBI calls. As hypervisor
implements the SBI PMU extension, these firmware events corresponds
to ecall invocations between VS->HS mode. All other firmware events
will always report zero if monitored as KVM doesn't implement them.

Signed-off-by: Atish Patra <atishp@rivosinc.com>
---
 arch/riscv/include/asm/kvm_vcpu_pmu.h |  16 ++++
 arch/riscv/include/asm/sbi.h          |   2 +-
 arch/riscv/kvm/tlb.c                  |   6 +-
 arch/riscv/kvm/vcpu_pmu.c             | 105 ++++++++++++++++++++++----
 arch/riscv/kvm/vcpu_sbi_replace.c     |   7 ++
 5 files changed, 119 insertions(+), 17 deletions(-)
  

Comments

Andrew Jones Jan. 13, 2023, 12:08 p.m. UTC | #1
On Thu, Dec 15, 2022 at 09:00:46AM -0800, Atish Patra wrote:
> SBI PMU extension defines a set of firmware events which can provide
> useful information to guests about number of SBI calls. As hypervisor
> implements the SBI PMU extension, these firmware events corresponds
> to ecall invocations between VS->HS mode. All other firmware events
> will always report zero if monitored as KVM doesn't implement them.
> 
> Signed-off-by: Atish Patra <atishp@rivosinc.com>
> ---
>  arch/riscv/include/asm/kvm_vcpu_pmu.h |  16 ++++
>  arch/riscv/include/asm/sbi.h          |   2 +-
>  arch/riscv/kvm/tlb.c                  |   6 +-
>  arch/riscv/kvm/vcpu_pmu.c             | 105 ++++++++++++++++++++++----
>  arch/riscv/kvm/vcpu_sbi_replace.c     |   7 ++
>  5 files changed, 119 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/riscv/include/asm/kvm_vcpu_pmu.h b/arch/riscv/include/asm/kvm_vcpu_pmu.h
> index 7a9a8e6..cccc6182 100644
> --- a/arch/riscv/include/asm/kvm_vcpu_pmu.h
> +++ b/arch/riscv/include/asm/kvm_vcpu_pmu.h
> @@ -17,6 +17,14 @@
>  #define RISCV_KVM_MAX_FW_CTRS 32
>  #define RISCV_MAX_COUNTERS      64
>  
> +struct kvm_fw_event {
> +	/* Current value of the event */
> +	unsigned long value;
> +
> +	/* Event monitoring status */
> +	bool started;
> +};
> +
>  /* Per virtual pmu counter data */
>  struct kvm_pmc {
>  	u8 idx;
> @@ -25,11 +33,14 @@ struct kvm_pmc {
>  	union sbi_pmu_ctr_info cinfo;
>  	/* Event monitoring status */
>  	bool started;
> +	/* Monitoring event ID */
> +	unsigned long event_idx;
>  };
>  
>  /* PMU data structure per vcpu */
>  struct kvm_pmu {
>  	struct kvm_pmc pmc[RISCV_MAX_COUNTERS];
> +	struct kvm_fw_event fw_event[RISCV_KVM_MAX_FW_CTRS];
>  	/* Number of the virtual firmware counters available */
>  	int num_fw_ctrs;
>  	/* Number of the virtual hardware counters available */
> @@ -52,6 +63,7 @@ struct kvm_pmu {
>  { .base = CSR_CYCLE,      .count = 31, .func = kvm_riscv_vcpu_pmu_read_hpm },
>  #endif
>  
> +int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid);
>  int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num,
>  				unsigned long *val, unsigned long new_val,
>  				unsigned long wr_mask);
> @@ -81,6 +93,10 @@ struct kvm_pmu {
>  #define KVM_RISCV_VCPU_HPMCOUNTER_CSR_FUNCS \
>  { .base = 0,      .count = 0, .func = NULL },
>  
> +static inline int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid)
> +{
> +	return 0;
> +}
>  
>  static inline int kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
>  {
> diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
> index 2a0ef738..a192a95a 100644
> --- a/arch/riscv/include/asm/sbi.h
> +++ b/arch/riscv/include/asm/sbi.h
> @@ -171,7 +171,7 @@ enum sbi_pmu_fw_generic_events_t {
>  	SBI_PMU_FW_IPI_SENT		= 6,
>  	SBI_PMU_FW_IPI_RECVD		= 7,
>  	SBI_PMU_FW_FENCE_I_SENT		= 8,
> -	SBI_PMU_FW_FENCE_I_RECVD	= 9,
> +	SBI_PMU_FW_FENCE_I_RCVD		= 9,

This should probably be in its own patch.

>  	SBI_PMU_FW_SFENCE_VMA_SENT	= 10,
>  	SBI_PMU_FW_SFENCE_VMA_RCVD	= 11,
>  	SBI_PMU_FW_SFENCE_VMA_ASID_SENT	= 12,
> diff --git a/arch/riscv/kvm/tlb.c b/arch/riscv/kvm/tlb.c
> index 309d79b..de81920 100644
> --- a/arch/riscv/kvm/tlb.c
> +++ b/arch/riscv/kvm/tlb.c
> @@ -181,6 +181,7 @@ void kvm_riscv_local_tlb_sanitize(struct kvm_vcpu *vcpu)
>  
>  void kvm_riscv_fence_i_process(struct kvm_vcpu *vcpu)
>  {
> +	kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_FENCE_I_RCVD);
>  	local_flush_icache_all();
>  }
>  
> @@ -264,15 +265,18 @@ void kvm_riscv_hfence_process(struct kvm_vcpu *vcpu)
>  						d.addr, d.size, d.order);
>  			break;
>  		case KVM_RISCV_HFENCE_VVMA_ASID_GVA:
> +			kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD);
>  			kvm_riscv_local_hfence_vvma_asid_gva(
>  						READ_ONCE(v->vmid), d.asid,
>  						d.addr, d.size, d.order);
>  			break;
>  		case KVM_RISCV_HFENCE_VVMA_ASID_ALL:
> +			kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD);
>  			kvm_riscv_local_hfence_vvma_asid_all(
>  						READ_ONCE(v->vmid), d.asid);
>  			break;
>  		case KVM_RISCV_HFENCE_VVMA_GVA:
> +			kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_RCVD);
>  			kvm_riscv_local_hfence_vvma_gva(
>  						READ_ONCE(v->vmid),
>  						d.addr, d.size, d.order);
> @@ -323,7 +327,7 @@ void kvm_riscv_fence_i(struct kvm *kvm,
>  		       unsigned long hbase, unsigned long hmask)
>  {
>  	make_xfence_request(kvm, hbase, hmask, KVM_REQ_FENCE_I,
> -			    KVM_REQ_FENCE_I, NULL);
> +		    KVM_REQ_FENCE_I, NULL);

stray change, and whitespace was correct before

>  }
>  
>  void kvm_riscv_hfence_gvma_vmid_gpa(struct kvm *kvm,
> diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c
> index 21c1f0f..a64a7ae 100644
> --- a/arch/riscv/kvm/vcpu_pmu.c
> +++ b/arch/riscv/kvm/vcpu_pmu.c
> @@ -170,18 +170,36 @@ static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx,
>  	return pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask);
>  }
>  
> +int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid)
> +{
> +	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
> +	struct kvm_fw_event *fevent;
> +
> +	if (!kvpmu || fid >= SBI_PMU_FW_MAX)
> +		return -EINVAL;
> +
> +	fevent = &kvpmu->fw_event[fid];
> +	if (fevent->started)
> +		fevent->value++;
> +
> +	return 0;
> +}
> +
>  static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
>  			unsigned long *out_val)
>  {
>  	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
>  	struct kvm_pmc *pmc;
>  	u64 enabled, running;
> +	int fevent_code;
>  
>  	pmc = &kvpmu->pmc[cidx];
> -	if (!pmc->perf_event)
> -		return -EINVAL;
>  
> -	pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running);
> +	if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
> +		fevent_code = get_event_code(pmc->event_idx);
> +		pmc->counter_val = kvpmu->fw_event[fevent_code].value;
> +	} else if (pmc->perf_event)
> +		pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running);
>  	*out_val = pmc->counter_val;
>  
>  	return 0;
> @@ -238,6 +256,7 @@ int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
>  	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
>  	int i, num_ctrs, pmc_index, sbiret = 0;
>  	struct kvm_pmc *pmc;
> +	int fevent_code;
>  
>  	num_ctrs = kvpmu->num_fw_ctrs + kvpmu->num_hw_ctrs;
>  	if (ctr_base + __fls(ctr_mask) >= num_ctrs) {
> @@ -253,7 +272,22 @@ int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
>  		pmc = &kvpmu->pmc[pmc_index];
>  		if (flag & SBI_PMU_START_FLAG_SET_INIT_VALUE)
>  			pmc->counter_val = ival;
> -		if (pmc->perf_event) {
> +		if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
> +			fevent_code = get_event_code(pmc->event_idx);
> +			if (fevent_code >= SBI_PMU_FW_MAX) {
> +				sbiret = SBI_ERR_INVALID_PARAM;
> +				goto out;
> +			}
> +
> +			/* Check if the counter was already started for some reason */
> +			if (kvpmu->fw_event[fevent_code].started) {
> +				sbiret = SBI_ERR_ALREADY_STARTED;
> +				continue;
> +			}
> +
> +			kvpmu->fw_event[fevent_code].started = true;
> +			kvpmu->fw_event[fevent_code].value = pmc->counter_val;
> +		} else if (pmc->perf_event) {
>  			if (unlikely(pmc->started)) {
>  				sbiret = SBI_ERR_ALREADY_STARTED;
>  				continue;
> @@ -281,6 +315,7 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
>  	int i, num_ctrs, pmc_index, sbiret = 0;
>  	u64 enabled, running;
>  	struct kvm_pmc *pmc;
> +	int fevent_code;
>  
>  	num_ctrs = kvpmu->num_fw_ctrs + kvpmu->num_hw_ctrs;
>  	if ((ctr_base + __fls(ctr_mask)) >= num_ctrs) {
> @@ -294,7 +329,18 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
>  		if (!test_bit(pmc_index, kvpmu->pmc_in_use))
>  			continue;
>  		pmc = &kvpmu->pmc[pmc_index];
> -		if (pmc->perf_event) {
> +		if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
> +			fevent_code = get_event_code(pmc->event_idx);
> +			if (fevent_code >= SBI_PMU_FW_MAX) {
> +				sbiret = SBI_ERR_INVALID_PARAM;
> +				goto out;
> +			}
> +
> +			if (!kvpmu->fw_event[fevent_code].started)
> +				sbiret = SBI_ERR_ALREADY_STOPPED;
> +
> +			kvpmu->fw_event[fevent_code].started = false;
> +		} else if (pmc->perf_event) {
>  			if (pmc->started) {
>  				/* Stop counting the counter */
>  				perf_event_disable(pmc->perf_event);
> @@ -307,12 +353,15 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
>  				pmc->counter_val += perf_event_read_value(pmc->perf_event,
>  									  &enabled, &running);
>  				pmu_release_perf_event(pmc);
> -				clear_bit(pmc_index, kvpmu->pmc_in_use);
>  			}
>  		} else {
>  			kvm_debug("Can not stop counter due to invalid confiugartion\n");
>  			sbiret = SBI_ERR_INVALID_PARAM;
>  		}
> +		if (flag & SBI_PMU_STOP_FLAG_RESET) {
> +			pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
> +			clear_bit(pmc_index, kvpmu->pmc_in_use);

nit: I'd probably just leave clear_bit where it was and add

if (flag & SBI_PMU_STOP_FLAG_RESET)
   pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;

to the firmware arm.

> +		}
>  	}
>  
>  out:
> @@ -329,12 +378,12 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
>  	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
>  	struct perf_event *event;
>  	struct perf_event_attr attr;
> -	int num_ctrs, ctr_idx;
> +	int num_ctrs, ctr_idx, sbiret = 0;
>  	u32 etype = pmu_get_perf_event_type(eidx);
>  	u64 config;
> -	struct kvm_pmc *pmc;
> -	int sbiret = 0;
> -
> +	struct kvm_pmc *pmc = NULL;
> +	bool is_fevent;
> +	unsigned long event_code;
>  
>  	num_ctrs = kvpmu->num_fw_ctrs + kvpmu->num_hw_ctrs;
>  	if (etype == PERF_TYPE_MAX || (ctr_base + __fls(ctr_mask) >= num_ctrs)) {
> @@ -342,7 +391,9 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
>  		goto out;
>  	}
>  
> -	if (pmu_is_fw_event(eidx)) {
> +	event_code = get_event_code(eidx);
> +	is_fevent = pmu_is_fw_event(eidx);
> +	if (is_fevent && event_code >= SBI_PMU_FW_MAX) {
>  		sbiret = SBI_ERR_NOT_SUPPORTED;
>  		goto out;
>  	}
> @@ -357,7 +408,10 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
>  			goto out;
>  		}
>  		ctr_idx = ctr_base;
> -		goto match_done;
> +		if (is_fevent)
> +			goto perf_event_done;
> +		else
> +			goto match_done;
>  	}
>  
>  	ctr_idx = pmu_get_pmc_index(kvpmu, eidx, ctr_base, ctr_mask);
> @@ -366,6 +420,13 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
>  		goto out;
>  	}
>  
> +	/*
> +	 * No need to create perf events for firmware events as the firmware counter
> +	 * is supposed to return the measurement of VS->HS mode invocations.
> +	 */
> +	if (is_fevent)
> +		goto perf_event_done;
> +
>  match_done:
>  	pmc = &kvpmu->pmc[ctr_idx];
>  	pmu_release_perf_event(pmc);
> @@ -404,10 +465,19 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
>  		return -EOPNOTSUPP;
>  	}
>  
> -	set_bit(ctr_idx, kvpmu->pmc_in_use);
>  	pmc->perf_event = event;
> -	if (flag & SBI_PMU_CFG_FLAG_AUTO_START)
> -		perf_event_enable(pmc->perf_event);

Maybe we can move the perf setup stuff into a helper function and
then guard it with an if-statement rather than have the gotos?

> +perf_event_done:
> +	if (flag & SBI_PMU_CFG_FLAG_AUTO_START) {
> +		if (is_fevent)
> +			kvpmu->fw_event[event_code].started = true;
> +		else
> +			perf_event_enable(pmc->perf_event);
> +	}
> +	/* This should be only true for firmware events */
> +	if (!pmc)
> +		pmc = &kvpmu->pmc[ctr_idx];
> +	pmc->event_idx = eidx;
> +	set_bit(ctr_idx, kvpmu->pmc_in_use);
>  
>  	ext_data->out_val = ctr_idx;
>  out:
> @@ -451,6 +521,7 @@ int kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
>  	bitmap_zero(kvpmu->pmc_in_use, RISCV_MAX_COUNTERS);
>  	kvpmu->num_hw_ctrs = num_hw_ctrs;
>  	kvpmu->num_fw_ctrs = num_fw_ctrs;
> +	memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));

I'm wondering if we need this array. We already have an underused pmc for
the fw events which has counter_val and started. Can't we just used those?

>  	/*
>  	 * There is no corelation betwen the logical hardware counter and virtual counters.
>  	 * However, we need to encode a hpmcounter CSR in the counter info field so that
> @@ -464,6 +535,7 @@ int kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
>  		pmc = &kvpmu->pmc[i];
>  		pmc->idx = i;
>  		pmc->counter_val = 0;
> +		pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
>  		if (i < kvpmu->num_hw_ctrs) {
>  			kvpmu->pmc[i].cinfo.type = SBI_PMU_CTR_TYPE_HW;
>  			if (i < 3)
> @@ -501,7 +573,10 @@ void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu)
>  	for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_MAX_COUNTERS) {
>  		pmc = &kvpmu->pmc[i];
>  		pmu_release_perf_event(pmc);
> +		pmc->counter_val = 0;
> +		pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
>  	}
> +	memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
>  }
>  
>  void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu)
> diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c
> index d029136..3f39711 100644
> --- a/arch/riscv/kvm/vcpu_sbi_replace.c
> +++ b/arch/riscv/kvm/vcpu_sbi_replace.c
> @@ -11,6 +11,7 @@
>  #include <linux/kvm_host.h>
>  #include <asm/sbi.h>
>  #include <asm/kvm_vcpu_timer.h>
> +#include <asm/kvm_vcpu_pmu.h>
>  #include <asm/kvm_vcpu_sbi.h>
>  
>  static int kvm_sbi_ext_time_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
> @@ -26,6 +27,7 @@ static int kvm_sbi_ext_time_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
>  		return 0;
>  	}
>  
> +	kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_SET_TIMER);
>  #if __riscv_xlen == 32
>  	next_cycle = ((u64)cp->a1 << 32) | (u64)cp->a0;
>  #else
> @@ -58,6 +60,7 @@ static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
>  		return 0;
>  	}
>  
> +	kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_IPI_SENT);
>  	kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
>  		if (hbase != -1UL) {
>  			if (tmp->vcpu_id < hbase)
> @@ -68,6 +71,7 @@ static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
>  		ret = kvm_riscv_vcpu_set_interrupt(tmp, IRQ_VS_SOFT);
>  		if (ret < 0)
>  			break;
> +		kvm_riscv_vcpu_pmu_incr_fw(tmp, SBI_PMU_FW_IPI_RECVD);
>  	}
>  
>  	return ret;
> @@ -91,6 +95,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
>  	switch (funcid) {
>  	case SBI_EXT_RFENCE_REMOTE_FENCE_I:
>  		kvm_riscv_fence_i(vcpu->kvm, hbase, hmask);
> +		kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_FENCE_I_SENT);
>  		break;
>  	case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
>  		if (cp->a2 == 0 && cp->a3 == 0)
> @@ -98,6 +103,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
>  		else
>  			kvm_riscv_hfence_vvma_gva(vcpu->kvm, hbase, hmask,
>  						  cp->a2, cp->a3, PAGE_SHIFT);
> +		kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_SENT);
>  		break;
>  	case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
>  		if (cp->a2 == 0 && cp->a3 == 0)
> @@ -108,6 +114,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
>  						       hbase, hmask,
>  						       cp->a2, cp->a3,
>  						       PAGE_SHIFT, cp->a4);
> +		kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_ASID_SENT);
>  		break;
>  	case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA:
>  	case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID:
> -- 
> 2.25.1
>

It think it'd be nice to break the application of
kvm_riscv_vcpu_pmu_incr_fw() out of this patch. I.e. introduce
kvm_riscv_vcpu_pmu_incr_fw() in this patch and then a second patch
applies it to all the ecalls.

Thanks,
drew
  
Atish Patra Jan. 26, 2023, 3:08 a.m. UTC | #2
On Fri, Jan 13, 2023 at 4:08 AM Andrew Jones <ajones@ventanamicro.com> wrote:
>
> On Thu, Dec 15, 2022 at 09:00:46AM -0800, Atish Patra wrote:
> > SBI PMU extension defines a set of firmware events which can provide
> > useful information to guests about number of SBI calls. As hypervisor
> > implements the SBI PMU extension, these firmware events corresponds
> > to ecall invocations between VS->HS mode. All other firmware events
> > will always report zero if monitored as KVM doesn't implement them.
> >
> > Signed-off-by: Atish Patra <atishp@rivosinc.com>
> > ---
> >  arch/riscv/include/asm/kvm_vcpu_pmu.h |  16 ++++
> >  arch/riscv/include/asm/sbi.h          |   2 +-
> >  arch/riscv/kvm/tlb.c                  |   6 +-
> >  arch/riscv/kvm/vcpu_pmu.c             | 105 ++++++++++++++++++++++----
> >  arch/riscv/kvm/vcpu_sbi_replace.c     |   7 ++
> >  5 files changed, 119 insertions(+), 17 deletions(-)
> >
> > diff --git a/arch/riscv/include/asm/kvm_vcpu_pmu.h b/arch/riscv/include/asm/kvm_vcpu_pmu.h
> > index 7a9a8e6..cccc6182 100644
> > --- a/arch/riscv/include/asm/kvm_vcpu_pmu.h
> > +++ b/arch/riscv/include/asm/kvm_vcpu_pmu.h
> > @@ -17,6 +17,14 @@
> >  #define RISCV_KVM_MAX_FW_CTRS 32
> >  #define RISCV_MAX_COUNTERS      64
> >
> > +struct kvm_fw_event {
> > +     /* Current value of the event */
> > +     unsigned long value;
> > +
> > +     /* Event monitoring status */
> > +     bool started;
> > +};
> > +
> >  /* Per virtual pmu counter data */
> >  struct kvm_pmc {
> >       u8 idx;
> > @@ -25,11 +33,14 @@ struct kvm_pmc {
> >       union sbi_pmu_ctr_info cinfo;
> >       /* Event monitoring status */
> >       bool started;
> > +     /* Monitoring event ID */
> > +     unsigned long event_idx;
> >  };
> >
> >  /* PMU data structure per vcpu */
> >  struct kvm_pmu {
> >       struct kvm_pmc pmc[RISCV_MAX_COUNTERS];
> > +     struct kvm_fw_event fw_event[RISCV_KVM_MAX_FW_CTRS];
> >       /* Number of the virtual firmware counters available */
> >       int num_fw_ctrs;
> >       /* Number of the virtual hardware counters available */
> > @@ -52,6 +63,7 @@ struct kvm_pmu {
> >  { .base = CSR_CYCLE,      .count = 31, .func = kvm_riscv_vcpu_pmu_read_hpm },
> >  #endif
> >
> > +int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid);
> >  int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num,
> >                               unsigned long *val, unsigned long new_val,
> >                               unsigned long wr_mask);
> > @@ -81,6 +93,10 @@ struct kvm_pmu {
> >  #define KVM_RISCV_VCPU_HPMCOUNTER_CSR_FUNCS \
> >  { .base = 0,      .count = 0, .func = NULL },
> >
> > +static inline int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid)
> > +{
> > +     return 0;
> > +}
> >
> >  static inline int kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
> >  {
> > diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
> > index 2a0ef738..a192a95a 100644
> > --- a/arch/riscv/include/asm/sbi.h
> > +++ b/arch/riscv/include/asm/sbi.h
> > @@ -171,7 +171,7 @@ enum sbi_pmu_fw_generic_events_t {
> >       SBI_PMU_FW_IPI_SENT             = 6,
> >       SBI_PMU_FW_IPI_RECVD            = 7,
> >       SBI_PMU_FW_FENCE_I_SENT         = 8,
> > -     SBI_PMU_FW_FENCE_I_RECVD        = 9,
> > +     SBI_PMU_FW_FENCE_I_RCVD         = 9,
>
> This should probably be in its own patch.

done.

>
> >       SBI_PMU_FW_SFENCE_VMA_SENT      = 10,
> >       SBI_PMU_FW_SFENCE_VMA_RCVD      = 11,
> >       SBI_PMU_FW_SFENCE_VMA_ASID_SENT = 12,
> > diff --git a/arch/riscv/kvm/tlb.c b/arch/riscv/kvm/tlb.c
> > index 309d79b..de81920 100644
> > --- a/arch/riscv/kvm/tlb.c
> > +++ b/arch/riscv/kvm/tlb.c
> > @@ -181,6 +181,7 @@ void kvm_riscv_local_tlb_sanitize(struct kvm_vcpu *vcpu)
> >
> >  void kvm_riscv_fence_i_process(struct kvm_vcpu *vcpu)
> >  {
> > +     kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_FENCE_I_RCVD);
> >       local_flush_icache_all();
> >  }
> >
> > @@ -264,15 +265,18 @@ void kvm_riscv_hfence_process(struct kvm_vcpu *vcpu)
> >                                               d.addr, d.size, d.order);
> >                       break;
> >               case KVM_RISCV_HFENCE_VVMA_ASID_GVA:
> > +                     kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD);
> >                       kvm_riscv_local_hfence_vvma_asid_gva(
> >                                               READ_ONCE(v->vmid), d.asid,
> >                                               d.addr, d.size, d.order);
> >                       break;
> >               case KVM_RISCV_HFENCE_VVMA_ASID_ALL:
> > +                     kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD);
> >                       kvm_riscv_local_hfence_vvma_asid_all(
> >                                               READ_ONCE(v->vmid), d.asid);
> >                       break;
> >               case KVM_RISCV_HFENCE_VVMA_GVA:
> > +                     kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_RCVD);
> >                       kvm_riscv_local_hfence_vvma_gva(
> >                                               READ_ONCE(v->vmid),
> >                                               d.addr, d.size, d.order);
> > @@ -323,7 +327,7 @@ void kvm_riscv_fence_i(struct kvm *kvm,
> >                      unsigned long hbase, unsigned long hmask)
> >  {
> >       make_xfence_request(kvm, hbase, hmask, KVM_REQ_FENCE_I,
> > -                         KVM_REQ_FENCE_I, NULL);
> > +                 KVM_REQ_FENCE_I, NULL);
>
> stray change, and whitespace was correct before
>

Fixed.

> >  }
> >
> >  void kvm_riscv_hfence_gvma_vmid_gpa(struct kvm *kvm,
> > diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c
> > index 21c1f0f..a64a7ae 100644
> > --- a/arch/riscv/kvm/vcpu_pmu.c
> > +++ b/arch/riscv/kvm/vcpu_pmu.c
> > @@ -170,18 +170,36 @@ static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx,
> >       return pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask);
> >  }
> >
> > +int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid)
> > +{
> > +     struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
> > +     struct kvm_fw_event *fevent;
> > +
> > +     if (!kvpmu || fid >= SBI_PMU_FW_MAX)
> > +             return -EINVAL;
> > +
> > +     fevent = &kvpmu->fw_event[fid];
> > +     if (fevent->started)
> > +             fevent->value++;
> > +
> > +     return 0;
> > +}
> > +
> >  static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
> >                       unsigned long *out_val)
> >  {
> >       struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
> >       struct kvm_pmc *pmc;
> >       u64 enabled, running;
> > +     int fevent_code;
> >
> >       pmc = &kvpmu->pmc[cidx];
> > -     if (!pmc->perf_event)
> > -             return -EINVAL;
> >
> > -     pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running);
> > +     if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
> > +             fevent_code = get_event_code(pmc->event_idx);
> > +             pmc->counter_val = kvpmu->fw_event[fevent_code].value;
> > +     } else if (pmc->perf_event)
> > +             pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running);
> >       *out_val = pmc->counter_val;
> >
> >       return 0;
> > @@ -238,6 +256,7 @@ int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
> >       struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
> >       int i, num_ctrs, pmc_index, sbiret = 0;
> >       struct kvm_pmc *pmc;
> > +     int fevent_code;
> >
> >       num_ctrs = kvpmu->num_fw_ctrs + kvpmu->num_hw_ctrs;
> >       if (ctr_base + __fls(ctr_mask) >= num_ctrs) {
> > @@ -253,7 +272,22 @@ int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
> >               pmc = &kvpmu->pmc[pmc_index];
> >               if (flag & SBI_PMU_START_FLAG_SET_INIT_VALUE)
> >                       pmc->counter_val = ival;
> > -             if (pmc->perf_event) {
> > +             if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
> > +                     fevent_code = get_event_code(pmc->event_idx);
> > +                     if (fevent_code >= SBI_PMU_FW_MAX) {
> > +                             sbiret = SBI_ERR_INVALID_PARAM;
> > +                             goto out;
> > +                     }
> > +
> > +                     /* Check if the counter was already started for some reason */
> > +                     if (kvpmu->fw_event[fevent_code].started) {
> > +                             sbiret = SBI_ERR_ALREADY_STARTED;
> > +                             continue;
> > +                     }
> > +
> > +                     kvpmu->fw_event[fevent_code].started = true;
> > +                     kvpmu->fw_event[fevent_code].value = pmc->counter_val;
> > +             } else if (pmc->perf_event) {
> >                       if (unlikely(pmc->started)) {
> >                               sbiret = SBI_ERR_ALREADY_STARTED;
> >                               continue;
> > @@ -281,6 +315,7 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
> >       int i, num_ctrs, pmc_index, sbiret = 0;
> >       u64 enabled, running;
> >       struct kvm_pmc *pmc;
> > +     int fevent_code;
> >
> >       num_ctrs = kvpmu->num_fw_ctrs + kvpmu->num_hw_ctrs;
> >       if ((ctr_base + __fls(ctr_mask)) >= num_ctrs) {
> > @@ -294,7 +329,18 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
> >               if (!test_bit(pmc_index, kvpmu->pmc_in_use))
> >                       continue;
> >               pmc = &kvpmu->pmc[pmc_index];
> > -             if (pmc->perf_event) {
> > +             if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
> > +                     fevent_code = get_event_code(pmc->event_idx);
> > +                     if (fevent_code >= SBI_PMU_FW_MAX) {
> > +                             sbiret = SBI_ERR_INVALID_PARAM;
> > +                             goto out;
> > +                     }
> > +
> > +                     if (!kvpmu->fw_event[fevent_code].started)
> > +                             sbiret = SBI_ERR_ALREADY_STOPPED;
> > +
> > +                     kvpmu->fw_event[fevent_code].started = false;
> > +             } else if (pmc->perf_event) {
> >                       if (pmc->started) {
> >                               /* Stop counting the counter */
> >                               perf_event_disable(pmc->perf_event);
> > @@ -307,12 +353,15 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
> >                               pmc->counter_val += perf_event_read_value(pmc->perf_event,
> >                                                                         &enabled, &running);
> >                               pmu_release_perf_event(pmc);
> > -                             clear_bit(pmc_index, kvpmu->pmc_in_use);
> >                       }
> >               } else {
> >                       kvm_debug("Can not stop counter due to invalid confiugartion\n");
> >                       sbiret = SBI_ERR_INVALID_PARAM;
> >               }
> > +             if (flag & SBI_PMU_STOP_FLAG_RESET) {
> > +                     pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
> > +                     clear_bit(pmc_index, kvpmu->pmc_in_use);
>
> nit: I'd probably just leave clear_bit where it was and add
>
> if (flag & SBI_PMU_STOP_FLAG_RESET)
>    pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
>
> to the firmware arm.
>

We have to do clear_bit for the firmware as well. That's why I moved it below
to avoid the same code twice.

> > +             }
> >       }
> >
> >  out:
> > @@ -329,12 +378,12 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
> >       struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
> >       struct perf_event *event;
> >       struct perf_event_attr attr;
> > -     int num_ctrs, ctr_idx;
> > +     int num_ctrs, ctr_idx, sbiret = 0;
> >       u32 etype = pmu_get_perf_event_type(eidx);
> >       u64 config;
> > -     struct kvm_pmc *pmc;
> > -     int sbiret = 0;
> > -
> > +     struct kvm_pmc *pmc = NULL;
> > +     bool is_fevent;
> > +     unsigned long event_code;
> >
> >       num_ctrs = kvpmu->num_fw_ctrs + kvpmu->num_hw_ctrs;
> >       if (etype == PERF_TYPE_MAX || (ctr_base + __fls(ctr_mask) >= num_ctrs)) {
> > @@ -342,7 +391,9 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
> >               goto out;
> >       }
> >
> > -     if (pmu_is_fw_event(eidx)) {
> > +     event_code = get_event_code(eidx);
> > +     is_fevent = pmu_is_fw_event(eidx);
> > +     if (is_fevent && event_code >= SBI_PMU_FW_MAX) {
> >               sbiret = SBI_ERR_NOT_SUPPORTED;
> >               goto out;
> >       }
> > @@ -357,7 +408,10 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
> >                       goto out;
> >               }
> >               ctr_idx = ctr_base;
> > -             goto match_done;
> > +             if (is_fevent)
> > +                     goto perf_event_done;
> > +             else
> > +                     goto match_done;
> >       }
> >
> >       ctr_idx = pmu_get_pmc_index(kvpmu, eidx, ctr_base, ctr_mask);
> > @@ -366,6 +420,13 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
> >               goto out;
> >       }
> >
> > +     /*
> > +      * No need to create perf events for firmware events as the firmware counter
> > +      * is supposed to return the measurement of VS->HS mode invocations.
> > +      */
> > +     if (is_fevent)
> > +             goto perf_event_done;
> > +
> >  match_done:
> >       pmc = &kvpmu->pmc[ctr_idx];
> >       pmu_release_perf_event(pmc);
> > @@ -404,10 +465,19 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
> >               return -EOPNOTSUPP;
> >       }
> >
> > -     set_bit(ctr_idx, kvpmu->pmc_in_use);
> >       pmc->perf_event = event;
> > -     if (flag & SBI_PMU_CFG_FLAG_AUTO_START)
> > -             perf_event_enable(pmc->perf_event);
>
> Maybe we can move the perf setup stuff into a helper function and
> then guard it with an if-statement rather than have the gotos?
>

Sure. Done.

> > +perf_event_done:
> > +     if (flag & SBI_PMU_CFG_FLAG_AUTO_START) {
> > +             if (is_fevent)
> > +                     kvpmu->fw_event[event_code].started = true;
> > +             else
> > +                     perf_event_enable(pmc->perf_event);
> > +     }
> > +     /* This should be only true for firmware events */
> > +     if (!pmc)
> > +             pmc = &kvpmu->pmc[ctr_idx];
> > +     pmc->event_idx = eidx;
> > +     set_bit(ctr_idx, kvpmu->pmc_in_use);
> >
> >       ext_data->out_val = ctr_idx;
> >  out:
> > @@ -451,6 +521,7 @@ int kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
> >       bitmap_zero(kvpmu->pmc_in_use, RISCV_MAX_COUNTERS);
> >       kvpmu->num_hw_ctrs = num_hw_ctrs;
> >       kvpmu->num_fw_ctrs = num_fw_ctrs;
> > +     memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
>
> I'm wondering if we need this array. We already have an underused pmc for
> the fw events which has counter_val and started. Can't we just used those?
>

My initial implementation was doing the same. However, pmc array is
counter index based
while fw_event is firmware event id based.
We need to maintain a mapping between these two or iterate the pmc
array one by one to find
correct entry in kvm_riscv_vcpu_pmu_incr_fw.

As the kvm_riscv_vcpu_pmu_incr_fw may be called from a performance
sensitive path, a separate
fw_event array helps quick lookup.

> >       /*
> >        * There is no corelation betwen the logical hardware counter and virtual counters.
> >        * However, we need to encode a hpmcounter CSR in the counter info field so that
> > @@ -464,6 +535,7 @@ int kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
> >               pmc = &kvpmu->pmc[i];
> >               pmc->idx = i;
> >               pmc->counter_val = 0;
> > +             pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
> >               if (i < kvpmu->num_hw_ctrs) {
> >                       kvpmu->pmc[i].cinfo.type = SBI_PMU_CTR_TYPE_HW;
> >                       if (i < 3)
> > @@ -501,7 +573,10 @@ void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu)
> >       for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_MAX_COUNTERS) {
> >               pmc = &kvpmu->pmc[i];
> >               pmu_release_perf_event(pmc);
> > +             pmc->counter_val = 0;
> > +             pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
> >       }
> > +     memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
> >  }
> >
> >  void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu)
> > diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c
> > index d029136..3f39711 100644
> > --- a/arch/riscv/kvm/vcpu_sbi_replace.c
> > +++ b/arch/riscv/kvm/vcpu_sbi_replace.c
> > @@ -11,6 +11,7 @@
> >  #include <linux/kvm_host.h>
> >  #include <asm/sbi.h>
> >  #include <asm/kvm_vcpu_timer.h>
> > +#include <asm/kvm_vcpu_pmu.h>
> >  #include <asm/kvm_vcpu_sbi.h>
> >
> >  static int kvm_sbi_ext_time_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
> > @@ -26,6 +27,7 @@ static int kvm_sbi_ext_time_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
> >               return 0;
> >       }
> >
> > +     kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_SET_TIMER);
> >  #if __riscv_xlen == 32
> >       next_cycle = ((u64)cp->a1 << 32) | (u64)cp->a0;
> >  #else
> > @@ -58,6 +60,7 @@ static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
> >               return 0;
> >       }
> >
> > +     kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_IPI_SENT);
> >       kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
> >               if (hbase != -1UL) {
> >                       if (tmp->vcpu_id < hbase)
> > @@ -68,6 +71,7 @@ static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
> >               ret = kvm_riscv_vcpu_set_interrupt(tmp, IRQ_VS_SOFT);
> >               if (ret < 0)
> >                       break;
> > +             kvm_riscv_vcpu_pmu_incr_fw(tmp, SBI_PMU_FW_IPI_RECVD);
> >       }
> >
> >       return ret;
> > @@ -91,6 +95,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
> >       switch (funcid) {
> >       case SBI_EXT_RFENCE_REMOTE_FENCE_I:
> >               kvm_riscv_fence_i(vcpu->kvm, hbase, hmask);
> > +             kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_FENCE_I_SENT);
> >               break;
> >       case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
> >               if (cp->a2 == 0 && cp->a3 == 0)
> > @@ -98,6 +103,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
> >               else
> >                       kvm_riscv_hfence_vvma_gva(vcpu->kvm, hbase, hmask,
> >                                                 cp->a2, cp->a3, PAGE_SHIFT);
> > +             kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_SENT);
> >               break;
> >       case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
> >               if (cp->a2 == 0 && cp->a3 == 0)
> > @@ -108,6 +114,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
> >                                                      hbase, hmask,
> >                                                      cp->a2, cp->a3,
> >                                                      PAGE_SHIFT, cp->a4);
> > +             kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_ASID_SENT);
> >               break;
> >       case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA:
> >       case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID:
> > --
> > 2.25.1
> >
>
> It think it'd be nice to break the application of
> kvm_riscv_vcpu_pmu_incr_fw() out of this patch. I.e. introduce
> kvm_riscv_vcpu_pmu_incr_fw() in this patch and then a second patch
> applies it to all the ecalls.
>

Done.

> Thanks,
> drew
  

Patch

diff --git a/arch/riscv/include/asm/kvm_vcpu_pmu.h b/arch/riscv/include/asm/kvm_vcpu_pmu.h
index 7a9a8e6..cccc6182 100644
--- a/arch/riscv/include/asm/kvm_vcpu_pmu.h
+++ b/arch/riscv/include/asm/kvm_vcpu_pmu.h
@@ -17,6 +17,14 @@ 
 #define RISCV_KVM_MAX_FW_CTRS 32
 #define RISCV_MAX_COUNTERS      64
 
+struct kvm_fw_event {
+	/* Current value of the event */
+	unsigned long value;
+
+	/* Event monitoring status */
+	bool started;
+};
+
 /* Per virtual pmu counter data */
 struct kvm_pmc {
 	u8 idx;
@@ -25,11 +33,14 @@  struct kvm_pmc {
 	union sbi_pmu_ctr_info cinfo;
 	/* Event monitoring status */
 	bool started;
+	/* Monitoring event ID */
+	unsigned long event_idx;
 };
 
 /* PMU data structure per vcpu */
 struct kvm_pmu {
 	struct kvm_pmc pmc[RISCV_MAX_COUNTERS];
+	struct kvm_fw_event fw_event[RISCV_KVM_MAX_FW_CTRS];
 	/* Number of the virtual firmware counters available */
 	int num_fw_ctrs;
 	/* Number of the virtual hardware counters available */
@@ -52,6 +63,7 @@  struct kvm_pmu {
 { .base = CSR_CYCLE,      .count = 31, .func = kvm_riscv_vcpu_pmu_read_hpm },
 #endif
 
+int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid);
 int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num,
 				unsigned long *val, unsigned long new_val,
 				unsigned long wr_mask);
@@ -81,6 +93,10 @@  struct kvm_pmu {
 #define KVM_RISCV_VCPU_HPMCOUNTER_CSR_FUNCS \
 { .base = 0,      .count = 0, .func = NULL },
 
+static inline int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid)
+{
+	return 0;
+}
 
 static inline int kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index 2a0ef738..a192a95a 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -171,7 +171,7 @@  enum sbi_pmu_fw_generic_events_t {
 	SBI_PMU_FW_IPI_SENT		= 6,
 	SBI_PMU_FW_IPI_RECVD		= 7,
 	SBI_PMU_FW_FENCE_I_SENT		= 8,
-	SBI_PMU_FW_FENCE_I_RECVD	= 9,
+	SBI_PMU_FW_FENCE_I_RCVD		= 9,
 	SBI_PMU_FW_SFENCE_VMA_SENT	= 10,
 	SBI_PMU_FW_SFENCE_VMA_RCVD	= 11,
 	SBI_PMU_FW_SFENCE_VMA_ASID_SENT	= 12,
diff --git a/arch/riscv/kvm/tlb.c b/arch/riscv/kvm/tlb.c
index 309d79b..de81920 100644
--- a/arch/riscv/kvm/tlb.c
+++ b/arch/riscv/kvm/tlb.c
@@ -181,6 +181,7 @@  void kvm_riscv_local_tlb_sanitize(struct kvm_vcpu *vcpu)
 
 void kvm_riscv_fence_i_process(struct kvm_vcpu *vcpu)
 {
+	kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_FENCE_I_RCVD);
 	local_flush_icache_all();
 }
 
@@ -264,15 +265,18 @@  void kvm_riscv_hfence_process(struct kvm_vcpu *vcpu)
 						d.addr, d.size, d.order);
 			break;
 		case KVM_RISCV_HFENCE_VVMA_ASID_GVA:
+			kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD);
 			kvm_riscv_local_hfence_vvma_asid_gva(
 						READ_ONCE(v->vmid), d.asid,
 						d.addr, d.size, d.order);
 			break;
 		case KVM_RISCV_HFENCE_VVMA_ASID_ALL:
+			kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD);
 			kvm_riscv_local_hfence_vvma_asid_all(
 						READ_ONCE(v->vmid), d.asid);
 			break;
 		case KVM_RISCV_HFENCE_VVMA_GVA:
+			kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_RCVD);
 			kvm_riscv_local_hfence_vvma_gva(
 						READ_ONCE(v->vmid),
 						d.addr, d.size, d.order);
@@ -323,7 +327,7 @@  void kvm_riscv_fence_i(struct kvm *kvm,
 		       unsigned long hbase, unsigned long hmask)
 {
 	make_xfence_request(kvm, hbase, hmask, KVM_REQ_FENCE_I,
-			    KVM_REQ_FENCE_I, NULL);
+		    KVM_REQ_FENCE_I, NULL);
 }
 
 void kvm_riscv_hfence_gvma_vmid_gpa(struct kvm *kvm,
diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c
index 21c1f0f..a64a7ae 100644
--- a/arch/riscv/kvm/vcpu_pmu.c
+++ b/arch/riscv/kvm/vcpu_pmu.c
@@ -170,18 +170,36 @@  static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx,
 	return pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask);
 }
 
+int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid)
+{
+	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
+	struct kvm_fw_event *fevent;
+
+	if (!kvpmu || fid >= SBI_PMU_FW_MAX)
+		return -EINVAL;
+
+	fevent = &kvpmu->fw_event[fid];
+	if (fevent->started)
+		fevent->value++;
+
+	return 0;
+}
+
 static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
 			unsigned long *out_val)
 {
 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
 	struct kvm_pmc *pmc;
 	u64 enabled, running;
+	int fevent_code;
 
 	pmc = &kvpmu->pmc[cidx];
-	if (!pmc->perf_event)
-		return -EINVAL;
 
-	pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running);
+	if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
+		fevent_code = get_event_code(pmc->event_idx);
+		pmc->counter_val = kvpmu->fw_event[fevent_code].value;
+	} else if (pmc->perf_event)
+		pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running);
 	*out_val = pmc->counter_val;
 
 	return 0;
@@ -238,6 +256,7 @@  int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
 	int i, num_ctrs, pmc_index, sbiret = 0;
 	struct kvm_pmc *pmc;
+	int fevent_code;
 
 	num_ctrs = kvpmu->num_fw_ctrs + kvpmu->num_hw_ctrs;
 	if (ctr_base + __fls(ctr_mask) >= num_ctrs) {
@@ -253,7 +272,22 @@  int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
 		pmc = &kvpmu->pmc[pmc_index];
 		if (flag & SBI_PMU_START_FLAG_SET_INIT_VALUE)
 			pmc->counter_val = ival;
-		if (pmc->perf_event) {
+		if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
+			fevent_code = get_event_code(pmc->event_idx);
+			if (fevent_code >= SBI_PMU_FW_MAX) {
+				sbiret = SBI_ERR_INVALID_PARAM;
+				goto out;
+			}
+
+			/* Check if the counter was already started for some reason */
+			if (kvpmu->fw_event[fevent_code].started) {
+				sbiret = SBI_ERR_ALREADY_STARTED;
+				continue;
+			}
+
+			kvpmu->fw_event[fevent_code].started = true;
+			kvpmu->fw_event[fevent_code].value = pmc->counter_val;
+		} else if (pmc->perf_event) {
 			if (unlikely(pmc->started)) {
 				sbiret = SBI_ERR_ALREADY_STARTED;
 				continue;
@@ -281,6 +315,7 @@  int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
 	int i, num_ctrs, pmc_index, sbiret = 0;
 	u64 enabled, running;
 	struct kvm_pmc *pmc;
+	int fevent_code;
 
 	num_ctrs = kvpmu->num_fw_ctrs + kvpmu->num_hw_ctrs;
 	if ((ctr_base + __fls(ctr_mask)) >= num_ctrs) {
@@ -294,7 +329,18 @@  int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
 		if (!test_bit(pmc_index, kvpmu->pmc_in_use))
 			continue;
 		pmc = &kvpmu->pmc[pmc_index];
-		if (pmc->perf_event) {
+		if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
+			fevent_code = get_event_code(pmc->event_idx);
+			if (fevent_code >= SBI_PMU_FW_MAX) {
+				sbiret = SBI_ERR_INVALID_PARAM;
+				goto out;
+			}
+
+			if (!kvpmu->fw_event[fevent_code].started)
+				sbiret = SBI_ERR_ALREADY_STOPPED;
+
+			kvpmu->fw_event[fevent_code].started = false;
+		} else if (pmc->perf_event) {
 			if (pmc->started) {
 				/* Stop counting the counter */
 				perf_event_disable(pmc->perf_event);
@@ -307,12 +353,15 @@  int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
 				pmc->counter_val += perf_event_read_value(pmc->perf_event,
 									  &enabled, &running);
 				pmu_release_perf_event(pmc);
-				clear_bit(pmc_index, kvpmu->pmc_in_use);
 			}
 		} else {
 			kvm_debug("Can not stop counter due to invalid confiugartion\n");
 			sbiret = SBI_ERR_INVALID_PARAM;
 		}
+		if (flag & SBI_PMU_STOP_FLAG_RESET) {
+			pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
+			clear_bit(pmc_index, kvpmu->pmc_in_use);
+		}
 	}
 
 out:
@@ -329,12 +378,12 @@  int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
 	struct perf_event *event;
 	struct perf_event_attr attr;
-	int num_ctrs, ctr_idx;
+	int num_ctrs, ctr_idx, sbiret = 0;
 	u32 etype = pmu_get_perf_event_type(eidx);
 	u64 config;
-	struct kvm_pmc *pmc;
-	int sbiret = 0;
-
+	struct kvm_pmc *pmc = NULL;
+	bool is_fevent;
+	unsigned long event_code;
 
 	num_ctrs = kvpmu->num_fw_ctrs + kvpmu->num_hw_ctrs;
 	if (etype == PERF_TYPE_MAX || (ctr_base + __fls(ctr_mask) >= num_ctrs)) {
@@ -342,7 +391,9 @@  int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
 		goto out;
 	}
 
-	if (pmu_is_fw_event(eidx)) {
+	event_code = get_event_code(eidx);
+	is_fevent = pmu_is_fw_event(eidx);
+	if (is_fevent && event_code >= SBI_PMU_FW_MAX) {
 		sbiret = SBI_ERR_NOT_SUPPORTED;
 		goto out;
 	}
@@ -357,7 +408,10 @@  int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
 			goto out;
 		}
 		ctr_idx = ctr_base;
-		goto match_done;
+		if (is_fevent)
+			goto perf_event_done;
+		else
+			goto match_done;
 	}
 
 	ctr_idx = pmu_get_pmc_index(kvpmu, eidx, ctr_base, ctr_mask);
@@ -366,6 +420,13 @@  int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
 		goto out;
 	}
 
+	/*
+	 * No need to create perf events for firmware events as the firmware counter
+	 * is supposed to return the measurement of VS->HS mode invocations.
+	 */
+	if (is_fevent)
+		goto perf_event_done;
+
 match_done:
 	pmc = &kvpmu->pmc[ctr_idx];
 	pmu_release_perf_event(pmc);
@@ -404,10 +465,19 @@  int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba
 		return -EOPNOTSUPP;
 	}
 
-	set_bit(ctr_idx, kvpmu->pmc_in_use);
 	pmc->perf_event = event;
-	if (flag & SBI_PMU_CFG_FLAG_AUTO_START)
-		perf_event_enable(pmc->perf_event);
+perf_event_done:
+	if (flag & SBI_PMU_CFG_FLAG_AUTO_START) {
+		if (is_fevent)
+			kvpmu->fw_event[event_code].started = true;
+		else
+			perf_event_enable(pmc->perf_event);
+	}
+	/* This should be only true for firmware events */
+	if (!pmc)
+		pmc = &kvpmu->pmc[ctr_idx];
+	pmc->event_idx = eidx;
+	set_bit(ctr_idx, kvpmu->pmc_in_use);
 
 	ext_data->out_val = ctr_idx;
 out:
@@ -451,6 +521,7 @@  int kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
 	bitmap_zero(kvpmu->pmc_in_use, RISCV_MAX_COUNTERS);
 	kvpmu->num_hw_ctrs = num_hw_ctrs;
 	kvpmu->num_fw_ctrs = num_fw_ctrs;
+	memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
 	/*
 	 * There is no corelation betwen the logical hardware counter and virtual counters.
 	 * However, we need to encode a hpmcounter CSR in the counter info field so that
@@ -464,6 +535,7 @@  int kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
 		pmc = &kvpmu->pmc[i];
 		pmc->idx = i;
 		pmc->counter_val = 0;
+		pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
 		if (i < kvpmu->num_hw_ctrs) {
 			kvpmu->pmc[i].cinfo.type = SBI_PMU_CTR_TYPE_HW;
 			if (i < 3)
@@ -501,7 +573,10 @@  void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu)
 	for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_MAX_COUNTERS) {
 		pmc = &kvpmu->pmc[i];
 		pmu_release_perf_event(pmc);
+		pmc->counter_val = 0;
+		pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
 	}
+	memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
 }
 
 void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu)
diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c
index d029136..3f39711 100644
--- a/arch/riscv/kvm/vcpu_sbi_replace.c
+++ b/arch/riscv/kvm/vcpu_sbi_replace.c
@@ -11,6 +11,7 @@ 
 #include <linux/kvm_host.h>
 #include <asm/sbi.h>
 #include <asm/kvm_vcpu_timer.h>
+#include <asm/kvm_vcpu_pmu.h>
 #include <asm/kvm_vcpu_sbi.h>
 
 static int kvm_sbi_ext_time_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
@@ -26,6 +27,7 @@  static int kvm_sbi_ext_time_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		return 0;
 	}
 
+	kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_SET_TIMER);
 #if __riscv_xlen == 32
 	next_cycle = ((u64)cp->a1 << 32) | (u64)cp->a0;
 #else
@@ -58,6 +60,7 @@  static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		return 0;
 	}
 
+	kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_IPI_SENT);
 	kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
 		if (hbase != -1UL) {
 			if (tmp->vcpu_id < hbase)
@@ -68,6 +71,7 @@  static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		ret = kvm_riscv_vcpu_set_interrupt(tmp, IRQ_VS_SOFT);
 		if (ret < 0)
 			break;
+		kvm_riscv_vcpu_pmu_incr_fw(tmp, SBI_PMU_FW_IPI_RECVD);
 	}
 
 	return ret;
@@ -91,6 +95,7 @@  static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
 	switch (funcid) {
 	case SBI_EXT_RFENCE_REMOTE_FENCE_I:
 		kvm_riscv_fence_i(vcpu->kvm, hbase, hmask);
+		kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_FENCE_I_SENT);
 		break;
 	case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA:
 		if (cp->a2 == 0 && cp->a3 == 0)
@@ -98,6 +103,7 @@  static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
 		else
 			kvm_riscv_hfence_vvma_gva(vcpu->kvm, hbase, hmask,
 						  cp->a2, cp->a3, PAGE_SHIFT);
+		kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_SENT);
 		break;
 	case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID:
 		if (cp->a2 == 0 && cp->a3 == 0)
@@ -108,6 +114,7 @@  static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run
 						       hbase, hmask,
 						       cp->a2, cp->a3,
 						       PAGE_SHIFT, cp->a4);
+		kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_ASID_SENT);
 		break;
 	case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA:
 	case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID: