svm: Add VMEXIT handlers for AVIC
authorSuravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Wed, 4 May 2016 19:09:48 +0000 (14:09 -0500)
committerPaolo Bonzini <pbonzini@redhat.com>
Wed, 18 May 2016 16:04:29 +0000 (18:04 +0200)
This patch introduces VMEXIT handlers, avic_incomplete_ipi_interception()
and avic_unaccelerated_access_interception() along with two trace points
(trace_kvm_avic_incomplete_ipi and trace_kvm_avic_unaccelerated_access).

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/include/asm/kvm_host.h
arch/x86/include/uapi/asm/svm.h
arch/x86/kvm/lapic.h
arch/x86/kvm/svm.c
arch/x86/kvm/trace.h
arch/x86/kvm/x86.c

index 337e13b..7aaa108 100644 (file)
@@ -775,6 +775,7 @@ struct kvm_arch {
        bool disabled_lapic_found;
 
        /* Struct members for AVIC */
+       u32 ldr_mode;
        struct page *avic_logical_id_table_page;
        struct page *avic_physical_id_table_page;
 };
index 8a4add8..b9e9bb2 100644 (file)
@@ -73,6 +73,8 @@
 #define SVM_EXIT_MWAIT_COND    0x08c
 #define SVM_EXIT_XSETBV        0x08d
 #define SVM_EXIT_NPF           0x400
+#define SVM_EXIT_AVIC_INCOMPLETE_IPI           0x401
+#define SVM_EXIT_AVIC_UNACCELERATED_ACCESS     0x402
 
 #define SVM_EXIT_ERR           -1
 
        { SVM_EXIT_SMI,         "smi" }, \
        { SVM_EXIT_INIT,        "init" }, \
        { SVM_EXIT_VINTR,       "vintr" }, \
+       { SVM_EXIT_CR0_SEL_WRITE, "cr0_sel_write" }, \
        { SVM_EXIT_CPUID,       "cpuid" }, \
        { SVM_EXIT_INVD,        "invd" }, \
+       { SVM_EXIT_PAUSE,       "pause" }, \
        { SVM_EXIT_HLT,         "hlt" }, \
        { SVM_EXIT_INVLPG,      "invlpg" }, \
        { SVM_EXIT_INVLPGA,     "invlpga" }, \
        { SVM_EXIT_MONITOR,     "monitor" }, \
        { SVM_EXIT_MWAIT,       "mwait" }, \
        { SVM_EXIT_XSETBV,      "xsetbv" }, \
-       { SVM_EXIT_NPF,         "npf" }
+       { SVM_EXIT_NPF,         "npf" }, \
+       { SVM_EXIT_RSM,         "rsm" }, \
+       { SVM_EXIT_AVIC_INCOMPLETE_IPI,         "avic_incomplete_ipi" }, \
+       { SVM_EXIT_AVIC_UNACCELERATED_ACCESS,   "avic_unaccelerated_access" }
 
 
 #endif /* _UAPI__SVM_H */
index bbe5d12..891c6da 100644 (file)
@@ -9,6 +9,9 @@
 #define KVM_APIC_SIPI          1
 #define KVM_APIC_LVT_NUM       6
 
+#define KVM_APIC_SHORT_MASK    0xc0000
+#define KVM_APIC_DEST_MASK     0x800
+
 struct kvm_timer {
        struct hrtimer timer;
        s64 period;                             /* unit: ns */
index 95d9b5c..db2c140 100644 (file)
@@ -91,6 +91,10 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
  */
 #define AVIC_MAX_PHYSICAL_ID_COUNT     255
 
+#define AVIC_UNACCEL_ACCESS_WRITE_MASK         1
+#define AVIC_UNACCEL_ACCESS_OFFSET_MASK                0xFF0
+#define AVIC_UNACCEL_ACCESS_VECTOR_MASK                0xFFFFFFFF
+
 static bool erratum_383_found __read_mostly;
 
 static const u32 host_save_user_msrs[] = {
@@ -176,6 +180,7 @@ struct vcpu_svm {
        /* cached guest cpuid flags for faster access */
        bool nrips_enabled      : 1;
 
+       u32 ldr_reg;
        struct page *avic_backing_page;
        u64 *avic_physical_id_cache;
 };
@@ -3492,6 +3497,278 @@ static int mwait_interception(struct vcpu_svm *svm)
        return nop_interception(svm);
 }
 
+enum avic_ipi_failure_cause {
+       AVIC_IPI_FAILURE_INVALID_INT_TYPE,
+       AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
+       AVIC_IPI_FAILURE_INVALID_TARGET,
+       AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
+};
+
+static int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
+{
+       u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
+       u32 icrl = svm->vmcb->control.exit_info_1;
+       u32 id = svm->vmcb->control.exit_info_2 >> 32;
+       u32 index = svm->vmcb->control.exit_info_2 && 0xFF;
+       struct kvm_lapic *apic = svm->vcpu.arch.apic;
+
+       trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index);
+
+       switch (id) {
+       case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
+               /*
+                * AVIC hardware handles the generation of
+                * IPIs when the specified Message Type is Fixed
+                * (also known as fixed delivery mode) and
+                * the Trigger Mode is edge-triggered. The hardware
+                * also supports self and broadcast delivery modes
+                * specified via the Destination Shorthand(DSH)
+                * field of the ICRL. Logical and physical APIC ID
+                * formats are supported. All other IPI types cause
+                * a #VMEXIT, which needs to emulated.
+                */
+               kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
+               kvm_lapic_reg_write(apic, APIC_ICR, icrl);
+               break;
+       case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: {
+               int i;
+               struct kvm_vcpu *vcpu;
+               struct kvm *kvm = svm->vcpu.kvm;
+               struct kvm_lapic *apic = svm->vcpu.arch.apic;
+
+               /*
+                * At this point, we expect that the AVIC HW has already
+                * set the appropriate IRR bits on the valid target
+                * vcpus. So, we just need to kick the appropriate vcpu.
+                */
+               kvm_for_each_vcpu(i, vcpu, kvm) {
+                       bool m = kvm_apic_match_dest(vcpu, apic,
+                                                    icrl & KVM_APIC_SHORT_MASK,
+                                                    GET_APIC_DEST_FIELD(icrh),
+                                                    icrl & KVM_APIC_DEST_MASK);
+
+                       if (m && !avic_vcpu_is_running(vcpu))
+                               kvm_vcpu_wake_up(vcpu);
+               }
+               break;
+       }
+       case AVIC_IPI_FAILURE_INVALID_TARGET:
+               break;
+       case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
+               WARN_ONCE(1, "Invalid backing page\n");
+               break;
+       default:
+               pr_err("Unknown IPI interception\n");
+       }
+
+       return 1;
+}
+
+static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
+{
+       struct kvm_arch *vm_data = &vcpu->kvm->arch;
+       int index;
+       u32 *logical_apic_id_table;
+       int dlid = GET_APIC_LOGICAL_ID(ldr);
+
+       if (!dlid)
+               return NULL;
+
+       if (flat) { /* flat */
+               index = ffs(dlid) - 1;
+               if (index > 7)
+                       return NULL;
+       } else { /* cluster */
+               int cluster = (dlid & 0xf0) >> 4;
+               int apic = ffs(dlid & 0x0f) - 1;
+
+               if ((apic < 0) || (apic > 7) ||
+                   (cluster >= 0xf))
+                       return NULL;
+               index = (cluster << 2) + apic;
+       }
+
+       logical_apic_id_table = (u32 *) page_address(vm_data->avic_logical_id_table_page);
+
+       return &logical_apic_id_table[index];
+}
+
+static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr,
+                         bool valid)
+{
+       bool flat;
+       u32 *entry, new_entry;
+
+       flat = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR) == APIC_DFR_FLAT;
+       entry = avic_get_logical_id_entry(vcpu, ldr, flat);
+       if (!entry)
+               return -EINVAL;
+
+       new_entry = READ_ONCE(*entry);
+       new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
+       new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
+       if (valid)
+               new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
+       else
+               new_entry &= ~AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
+       WRITE_ONCE(*entry, new_entry);
+
+       return 0;
+}
+
+static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
+{
+       int ret;
+       struct vcpu_svm *svm = to_svm(vcpu);
+       u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
+
+       if (!ldr)
+               return 1;
+
+       ret = avic_ldr_write(vcpu, vcpu->vcpu_id, ldr, true);
+       if (ret && svm->ldr_reg) {
+               avic_ldr_write(vcpu, 0, svm->ldr_reg, false);
+               svm->ldr_reg = 0;
+       } else {
+               svm->ldr_reg = ldr;
+       }
+       return ret;
+}
+
+static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
+{
+       u64 *old, *new;
+       struct vcpu_svm *svm = to_svm(vcpu);
+       u32 apic_id_reg = kvm_lapic_get_reg(vcpu->arch.apic, APIC_ID);
+       u32 id = (apic_id_reg >> 24) & 0xff;
+
+       if (vcpu->vcpu_id == id)
+               return 0;
+
+       old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id);
+       new = avic_get_physical_id_entry(vcpu, id);
+       if (!new || !old)
+               return 1;
+
+       /* We need to move physical_id_entry to new offset */
+       *new = *old;
+       *old = 0ULL;
+       to_svm(vcpu)->avic_physical_id_cache = new;
+
+       /*
+        * Also update the guest physical APIC ID in the logical
+        * APIC ID table entry if already setup the LDR.
+        */
+       if (svm->ldr_reg)
+               avic_handle_ldr_update(vcpu);
+
+       return 0;
+}
+
+static int avic_handle_dfr_update(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+       struct kvm_arch *vm_data = &vcpu->kvm->arch;
+       u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR);
+       u32 mod = (dfr >> 28) & 0xf;
+
+       /*
+        * We assume that all local APICs are using the same type.
+        * If this changes, we need to flush the AVIC logical
+        * APID id table.
+        */
+       if (vm_data->ldr_mode == mod)
+               return 0;
+
+       clear_page(page_address(vm_data->avic_logical_id_table_page));
+       vm_data->ldr_mode = mod;
+
+       if (svm->ldr_reg)
+               avic_handle_ldr_update(vcpu);
+       return 0;
+}
+
+static int avic_unaccel_trap_write(struct vcpu_svm *svm)
+{
+       struct kvm_lapic *apic = svm->vcpu.arch.apic;
+       u32 offset = svm->vmcb->control.exit_info_1 &
+                               AVIC_UNACCEL_ACCESS_OFFSET_MASK;
+
+       switch (offset) {
+       case APIC_ID:
+               if (avic_handle_apic_id_update(&svm->vcpu))
+                       return 0;
+               break;
+       case APIC_LDR:
+               if (avic_handle_ldr_update(&svm->vcpu))
+                       return 0;
+               break;
+       case APIC_DFR:
+               avic_handle_dfr_update(&svm->vcpu);
+               break;
+       default:
+               break;
+       }
+
+       kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
+
+       return 1;
+}
+
+static bool is_avic_unaccelerated_access_trap(u32 offset)
+{
+       bool ret = false;
+
+       switch (offset) {
+       case APIC_ID:
+       case APIC_EOI:
+       case APIC_RRR:
+       case APIC_LDR:
+       case APIC_DFR:
+       case APIC_SPIV:
+       case APIC_ESR:
+       case APIC_ICR:
+       case APIC_LVTT:
+       case APIC_LVTTHMR:
+       case APIC_LVTPC:
+       case APIC_LVT0:
+       case APIC_LVT1:
+       case APIC_LVTERR:
+       case APIC_TMICT:
+       case APIC_TDCR:
+               ret = true;
+               break;
+       default:
+               break;
+       }
+       return ret;
+}
+
+static int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
+{
+       int ret = 0;
+       u32 offset = svm->vmcb->control.exit_info_1 &
+                    AVIC_UNACCEL_ACCESS_OFFSET_MASK;
+       u32 vector = svm->vmcb->control.exit_info_2 &
+                    AVIC_UNACCEL_ACCESS_VECTOR_MASK;
+       bool write = (svm->vmcb->control.exit_info_1 >> 32) &
+                    AVIC_UNACCEL_ACCESS_WRITE_MASK;
+       bool trap = is_avic_unaccelerated_access_trap(offset);
+
+       trace_kvm_avic_unaccelerated_access(svm->vcpu.vcpu_id, offset,
+                                           trap, write, vector);
+       if (trap) {
+               /* Handling Trap */
+               WARN_ONCE(!write, "svm: Handling trap read.\n");
+               ret = avic_unaccel_trap_write(svm);
+       } else {
+               /* Handling Fault */
+               ret = (emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
+       }
+
+       return ret;
+}
+
 static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
        [SVM_EXIT_READ_CR0]                     = cr_interception,
        [SVM_EXIT_READ_CR3]                     = cr_interception,
@@ -3555,6 +3832,8 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
        [SVM_EXIT_XSETBV]                       = xsetbv_interception,
        [SVM_EXIT_NPF]                          = pf_interception,
        [SVM_EXIT_RSM]                          = emulate_on_interception,
+       [SVM_EXIT_AVIC_INCOMPLETE_IPI]          = avic_incomplete_ipi_interception,
+       [SVM_EXIT_AVIC_UNACCELERATED_ACCESS]    = avic_unaccelerated_access_interception,
 };
 
 static void dump_vmcb(struct kvm_vcpu *vcpu)
index 2f1ea2f..39f264c 100644 (file)
@@ -1292,6 +1292,63 @@ TRACE_EVENT(kvm_hv_stimer_cleanup,
                  __entry->vcpu_id, __entry->timer_index)
 );
 
+/*
+ * Tracepoint for AMD AVIC
+ */
+TRACE_EVENT(kvm_avic_incomplete_ipi,
+           TP_PROTO(u32 vcpu, u32 icrh, u32 icrl, u32 id, u32 index),
+           TP_ARGS(vcpu, icrh, icrl, id, index),
+
+       TP_STRUCT__entry(
+               __field(u32, vcpu)
+               __field(u32, icrh)
+               __field(u32, icrl)
+               __field(u32, id)
+               __field(u32, index)
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu = vcpu;
+               __entry->icrh = icrh;
+               __entry->icrl = icrl;
+               __entry->id = id;
+               __entry->index = index;
+       ),
+
+       TP_printk("vcpu=%u, icrh:icrl=%#010x:%08x, id=%u, index=%u\n",
+                 __entry->vcpu, __entry->icrh, __entry->icrl,
+                 __entry->id, __entry->index)
+);
+
+TRACE_EVENT(kvm_avic_unaccelerated_access,
+           TP_PROTO(u32 vcpu, u32 offset, bool ft, bool rw, u32 vec),
+           TP_ARGS(vcpu, offset, ft, rw, vec),
+
+       TP_STRUCT__entry(
+               __field(u32, vcpu)
+               __field(u32, offset)
+               __field(bool, ft)
+               __field(bool, rw)
+               __field(u32, vec)
+       ),
+
+       TP_fast_assign(
+               __entry->vcpu = vcpu;
+               __entry->offset = offset;
+               __entry->ft = ft;
+               __entry->rw = rw;
+               __entry->vec = vec;
+       ),
+
+       TP_printk("vcpu=%u, offset=%#x(%s), %s, %s, vec=%#x\n",
+                 __entry->vcpu,
+                 __entry->offset,
+                 __print_symbolic(__entry->offset, kvm_trace_symbol_apic),
+                 __entry->ft ? "trap" : "fault",
+                 __entry->rw ? "write" : "read",
+                 __entry->vec)
+);
+
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
index 7fcaed4..a8c7ca3 100644 (file)
@@ -8435,3 +8435,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_unaccelerated_access);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_avic_incomplete_ipi);