On Wed Nov 8, 2023 at 12:49 PM UTC, Alexander Graf wrote:
>
> On 08.11.23 12:18, Nicolas Saenz Julienne wrote:
> > Introduce HVCALL_TRANSLATE_VIRTUAL_ADDRESS, the hypercall receives a
> > GVA, generally from a less privileged VTL, and returns the GPA backing
> > it. The GVA -> GPA conversion is done by walking the target VTL's vCPU
> > MMU.
> >
> > NOTE: The hypercall implementation is incomplete and only shared for
> > completion. Additionally we'd like to move the VTL aware parts to
> > user-space.
>
>
> Yes, please :). We should handle the complete hypercall in user space if
> possible. If you're afraid that gva -> gpa conversion may run out of
> sync between a user space and the kvm implementations, let's introduce
> an ioctl that allows you to perform that conversion.
I'll look into introducing a generic API that performs MMU walks. The
devil is in the details though, the hypercall introduces flags like:
• HV_TRANSLATE_GVA_TLB_FLUSH_INHIBIT: Indicates that the TlbFlushInhibit
flag in the virtual processor’s HvRegisterInterceptSuspend register
should be set as a consequence of a successful return. This prevents
other virtual processors associated with the target partition from
flushing the stage 1 TLB of the specified virtual processor until
after the TlbFlushInhibit flag is cleared.
Which make things trickier.
Nicolas
@@ -2540,6 +2540,7 @@ static bool is_xmm_fast_hypercall(struct kvm_hv_hcall *hc)
case HVCALL_GET_VP_REGISTERS:
case HVCALL_SET_VP_REGISTERS:
case HVCALL_MODIFY_VTL_PROTECTION_MASK:
+ case HVCALL_TRANSLATE_VIRTUAL_ADDRESS:
return true;
}
@@ -2556,6 +2557,96 @@ static void kvm_hv_hypercall_read_xmm(struct kvm_hv_hcall *hc)
kvm_fpu_put();
}
+static bool kvm_hv_xlate_va_validate_input(struct kvm_vcpu* vcpu,
+ struct hv_xlate_va_input *in,
+ u8 *vtl, u8 *flags)
+{
+ union hv_input_vtl in_vtl;
+
+ if (in->partition_id != HV_PARTITION_ID_SELF)
+ return false;
+
+ if (in->vp_index != HV_VP_INDEX_SELF &&
+ in->vp_index != kvm_hv_get_vpindex(vcpu))
+ return false;
+
+ in_vtl.as_uint8 = in->control_flags >> 56;
+ *flags = in->control_flags & HV_XLATE_GVA_FLAGS_MASK;
+ if (*flags > (HV_XLATE_GVA_VAL_READ |
+ HV_XLATE_GVA_VAL_WRITE |
+ HV_XLATE_GVA_VAL_EXECUTE))
+ pr_info_ratelimited("Translate VA control flags unsupported and will be ignored: 0x%llx\n",
+ in->control_flags);
+
+ *vtl = in_vtl.use_target_vtl ? in_vtl.target_vtl :
+ kvm_hv_get_active_vtl(vcpu);
+ if (*vtl > kvm_hv_get_active_vtl(vcpu))
+ return false;
+
+ return true;
+}
+
+static u64 kvm_hv_xlate_va_walk(struct kvm_vcpu* vcpu, u64 gva, u8 flags)
+{
+ struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
+ u32 access = 0;
+
+ if (flags & HV_XLATE_GVA_VAL_WRITE)
+ access |= PFERR_WRITE_MASK;
+ if (flags & HV_XLATE_GVA_VAL_EXECUTE)
+ access |= PFERR_FETCH_MASK;
+
+ return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, mmu, gva, access, NULL);
+}
+
+static u64 kvm_hv_translate_virtual_address(struct kvm_vcpu* vcpu,
+ struct kvm_hv_hcall *hc)
+{
+ struct hv_xlate_va_output output = {};
+ struct hv_xlate_va_input input;
+ struct kvm_vcpu *target_vcpu;
+ u8 flags, target_vtl;
+
+ if (hc->fast) {
+ input.partition_id = hc->ingpa;
+ input.vp_index = hc->outgpa & 0xFFFFFFFF;
+ input.control_flags = sse128_lo(hc->xmm[0]);
+ input.gva = sse128_hi(hc->xmm[0]);
+ } else {
+ if (kvm_read_guest(vcpu->kvm, hc->ingpa, &input, sizeof(input)))
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+ }
+
+ trace_kvm_hv_translate_virtual_address(input.partition_id,
+ input.vp_index,
+ input.control_flags, input.gva);
+
+ if (!kvm_hv_xlate_va_validate_input(vcpu, &input, &target_vtl, &flags))
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+
+ target_vcpu = kvm_hv_get_vtl_vcpu(vcpu, target_vtl);
+ output.gpa = kvm_hv_xlate_va_walk(target_vcpu, input.gva << PAGE_SHIFT,
+ flags);
+ if (output.gpa == INVALID_GPA) {
+ output.result_code = HV_XLATE_GVA_UNMAPPED;
+ } else {
+ output.gpa >>= PAGE_SHIFT;
+ output.result_code = HV_XLATE_GVA_SUCCESS;
+ output.cache_type = HV_CACHE_TYPE_X64_WB;
+ }
+
+ if (hc->fast) {
+ memcpy(&hc->xmm[1], &output, sizeof(output));
+ hc->xmm_dirty = true;
+ } else {
+ if (kvm_write_guest(vcpu->kvm, hc->outgpa, &output,
+ sizeof(output)))
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+ }
+
+ return HV_STATUS_SUCCESS;
+}
+
static bool hv_check_hypercall_access(struct kvm_vcpu_hv *hv_vcpu, u16 code)
{
if (!hv_vcpu->enforce_cpuid)
@@ -2766,6 +2857,13 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
case HVCALL_VTL_CALL:
case HVCALL_VTL_RETURN:
goto hypercall_userspace_exit;
+ case HVCALL_TRANSLATE_VIRTUAL_ADDRESS:
+ if (unlikely(hc.rep_cnt)) {
+ ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+ }
+ ret = kvm_hv_translate_virtual_address(vcpu, &hc);
+ break;
default:
ret = HV_STATUS_INVALID_HYPERCALL_CODE;
break;
@@ -1372,6 +1372,29 @@ TRACE_EVENT(kvm_hv_stimer_cleanup,
__entry->vcpu_id, __entry->timer_index)
);
+TRACE_EVENT(kvm_hv_translate_virtual_address,
+ TP_PROTO(u64 partition_id, u32 vp_index, u64 control_flags, u64 gva),
+ TP_ARGS(partition_id, vp_index, control_flags, gva),
+
+ TP_STRUCT__entry(
+ __field(u64, partition_id)
+ __field(u32, vp_index)
+ __field(u64, control_flags)
+ __field(u64, gva)
+ ),
+
+ TP_fast_assign(
+ __entry->partition_id = partition_id;
+ __entry->vp_index = vp_index;
+ __entry->control_flags = control_flags;
+ __entry->gva = gva;
+ ),
+
+ TP_printk("partition id 0x%llx, vp index 0x%x, control flags 0x%llx, gva 0x%llx",
+ __entry->partition_id, __entry->vp_index,
+ __entry->control_flags, __entry->gva)
+);
+
TRACE_EVENT(kvm_apicv_inhibit_changed,
TP_PROTO(int reason, bool set, unsigned long inhibits),
TP_ARGS(reason, set, inhibits),
@@ -163,6 +163,7 @@ union hv_reference_tsc_msr {
#define HVCALL_CREATE_VP 0x004e
#define HVCALL_GET_VP_REGISTERS 0x0050
#define HVCALL_SET_VP_REGISTERS 0x0051
+#define HVCALL_TRANSLATE_VIRTUAL_ADDRESS 0x0052
#define HVCALL_POST_MESSAGE 0x005c
#define HVCALL_SIGNAL_EVENT 0x005d
#define HVCALL_POST_DEBUG_DATA 0x0069
@@ -842,4 +843,31 @@ union hv_register_vsm_code_page_offsets {
u64 reserved:40;
} __packed;
};
+
+#define HV_XLATE_GVA_SUCCESS 0
+#define HV_XLATE_GVA_UNMAPPED 1
+#define HV_XLATE_GPA_UNMAPPED 4
+#define HV_CACHE_TYPE_X64_WB 6
+
+#define HV_XLATE_GVA_VAL_READ 1
+#define HV_XLATE_GVA_VAL_WRITE 2
+#define HV_XLATE_GVA_VAL_EXECUTE 4
+#define HV_XLATE_GVA_FLAGS_MASK 0x3F
+
+struct hv_xlate_va_input {
+ u64 partition_id;
+ u32 vp_index;
+ u32 reserved;
+ u64 control_flags;
+ u64 gva;
+};
+
+struct hv_xlate_va_output {
+ u32 result_code;
+ u32 cache_type:8;
+ u32 overlay_page:1;
+ u32 reserved:23;
+ u64 gpa;
+};
+
#endif