Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
[cascardo/linux.git] / arch / x86 / kvm / vmx.c
index b2f5591..bc354f0 100644 (file)
@@ -2157,7 +2157,8 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
        unsigned int dest;
 
        if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
-               !irq_remapping_cap(IRQ_POSTING_CAP))
+               !irq_remapping_cap(IRQ_POSTING_CAP)  ||
+               !kvm_vcpu_apicv_active(vcpu))
                return;
 
        do {
@@ -2269,7 +2270,8 @@ static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
        struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
 
        if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
-               !irq_remapping_cap(IRQ_POSTING_CAP))
+               !irq_remapping_cap(IRQ_POSTING_CAP)  ||
+               !kvm_vcpu_apicv_active(vcpu))
                return;
 
        /* Set SN when the vCPU is preempted */
@@ -2794,8 +2796,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
                SECONDARY_EXEC_APIC_REGISTER_VIRT |
                SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
                SECONDARY_EXEC_WBINVD_EXITING |
-               SECONDARY_EXEC_XSAVES |
-               SECONDARY_EXEC_PCOMMIT;
+               SECONDARY_EXEC_XSAVES;
 
        if (enable_ept) {
                /* nested EPT: emulate EPT also to L1 */
@@ -3381,7 +3382,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
                        SECONDARY_EXEC_SHADOW_VMCS |
                        SECONDARY_EXEC_XSAVES |
                        SECONDARY_EXEC_ENABLE_PML |
-                       SECONDARY_EXEC_PCOMMIT |
                        SECONDARY_EXEC_TSC_SCALING;
                if (adjust_vmx_controls(min2, opt2,
                                        MSR_IA32_VMX_PROCBASED_CTLS2,
@@ -4973,9 +4973,6 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
        if (!enable_pml)
                exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
 
-       /* Currently, we allow L1 guest to directly run pcommit instruction. */
-       exec_control &= ~SECONDARY_EXEC_PCOMMIT;
-
        return exec_control;
 }
 
@@ -5020,9 +5017,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
        vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
 
-       if (cpu_has_secondary_exec_ctrls())
+       if (cpu_has_secondary_exec_ctrls()) {
                vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
                                vmx_secondary_exec_control(vmx));
+       }
 
        if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
                vmcs_write64(EOI_EXIT_BITMAP0, 0);
@@ -5095,6 +5093,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
        if (vmx_xsaves_supported())
                vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
 
+       if (enable_pml) {
+               ASSERT(vmx->pml_pg);
+               vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
+               vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
+       }
+
        return 0;
 }
 
@@ -6801,7 +6805,13 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
 
        /* Checks for #GP/#SS exceptions. */
        exn = false;
-       if (is_protmode(vcpu)) {
+       if (is_long_mode(vcpu)) {
+               /* Long mode: #GP(0)/#SS(0) if the memory address is in a
+                * non-canonical form. This is the only check on the memory
+                * destination for long mode!
+                */
+               exn = is_noncanonical_address(*ret);
+       } else if (is_protmode(vcpu)) {
                /* Protected mode: apply checks for segment validity in the
                 * following order:
                 * - segment type check (#GP(0) may be thrown)
@@ -6818,17 +6828,10 @@ static int get_vmx_mem_address(struct kvm_vcpu *vcpu,
                         * execute-only code segment
                         */
                        exn = ((s.type & 0xa) == 8);
-       }
-       if (exn) {
-               kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
-               return 1;
-       }
-       if (is_long_mode(vcpu)) {
-               /* Long mode: #GP(0)/#SS(0) if the memory address is in a
-                * non-canonical form. This is an only check for long mode.
-                */
-               exn = is_noncanonical_address(*ret);
-       } else if (is_protmode(vcpu)) {
+               if (exn) {
+                       kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+                       return 1;
+               }
                /* Protected mode: #GP(0)/#SS(0) if the segment is unusable.
                 */
                exn = (s.unusable != 0);
@@ -7708,13 +7711,6 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
        return 1;
 }
 
-static int handle_pcommit(struct kvm_vcpu *vcpu)
-{
-       /* we never catch pcommit instruct for L1 guest. */
-       WARN_ON(1);
-       return 1;
-}
-
 static int handle_preemption_timer(struct kvm_vcpu *vcpu)
 {
        kvm_lapic_expired_hv_timer(vcpu);
@@ -7771,7 +7767,6 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
        [EXIT_REASON_XSAVES]                  = handle_xsaves,
        [EXIT_REASON_XRSTORS]                 = handle_xrstors,
        [EXIT_REASON_PML_FULL]                = handle_pml_full,
-       [EXIT_REASON_PCOMMIT]                 = handle_pcommit,
        [EXIT_REASON_PREEMPTION_TIMER]        = handle_preemption_timer,
 };
 
@@ -8081,8 +8076,6 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
                 * the XSS exit bitmap in vmcs12.
                 */
                return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
-       case EXIT_REASON_PCOMMIT:
-               return nested_cpu_has2(vmcs12, SECONDARY_EXEC_PCOMMIT);
        case EXIT_REASON_PREEMPTION_TIMER:
                return false;
        default:
@@ -8096,22 +8089,6 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
        *info2 = vmcs_read32(VM_EXIT_INTR_INFO);
 }
 
-static int vmx_create_pml_buffer(struct vcpu_vmx *vmx)
-{
-       struct page *pml_pg;
-
-       pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
-       if (!pml_pg)
-               return -ENOMEM;
-
-       vmx->pml_pg = pml_pg;
-
-       vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
-       vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
-
-       return 0;
-}
-
 static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx)
 {
        if (vmx->pml_pg) {
@@ -8383,6 +8360,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
        if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
                        (exit_reason != EXIT_REASON_EXCEPTION_NMI &&
                        exit_reason != EXIT_REASON_EPT_VIOLATION &&
+                       exit_reason != EXIT_REASON_PML_FULL &&
                        exit_reason != EXIT_REASON_TASK_SWITCH)) {
                vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
@@ -9033,6 +9011,22 @@ static void vmx_load_vmcs01(struct kvm_vcpu *vcpu)
        put_cpu();
 }
 
+/*
+ * Ensure that the current vmcs of the logical processor is the
+ * vmcs01 of the vcpu before calling free_nested().
+ */
+static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
+{
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       int r;
+
+       r = vcpu_load(vcpu);
+       BUG_ON(r);
+       vmx_load_vmcs01(vcpu);
+       free_nested(vmx);
+       vcpu_put(vcpu);
+}
+
 static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -9041,8 +9035,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
                vmx_destroy_pml_buffer(vmx);
        free_vpid(vmx->vpid);
        leave_guest_mode(vcpu);
-       vmx_load_vmcs01(vcpu);
-       free_nested(vmx);
+       vmx_free_vcpu_nested(vcpu);
        free_loaded_vmcs(vmx->loaded_vmcs);
        kfree(vmx->guest_msrs);
        kvm_vcpu_uninit(vcpu);
@@ -9064,14 +9057,26 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
        if (err)
                goto free_vcpu;
 
+       err = -ENOMEM;
+
+       /*
+        * If PML is turned on, failure on enabling PML just results in failure
+        * of creating the vcpu, therefore we can simplify PML logic (by
+        * avoiding dealing with cases, such as enabling PML partially on vcpus
+        * for the guest, etc.
+        */
+       if (enable_pml) {
+               vmx->pml_pg = alloc_page(GFP_KERNEL | __GFP_ZERO);
+               if (!vmx->pml_pg)
+                       goto uninit_vcpu;
+       }
+
        vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
        BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0])
                     > PAGE_SIZE);
 
-       err = -ENOMEM;
-       if (!vmx->guest_msrs) {
-               goto uninit_vcpu;
-       }
+       if (!vmx->guest_msrs)
+               goto free_pml;
 
        vmx->loaded_vmcs = &vmx->vmcs01;
        vmx->loaded_vmcs->vmcs = alloc_vmcs();
@@ -9115,18 +9120,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
        vmx->nested.current_vmptr = -1ull;
        vmx->nested.current_vmcs12 = NULL;
 
-       /*
-        * If PML is turned on, failure on enabling PML just results in failure
-        * of creating the vcpu, therefore we can simplify PML logic (by
-        * avoiding dealing with cases, such as enabling PML partially on vcpus
-        * for the guest, etc.
-        */
-       if (enable_pml) {
-               err = vmx_create_pml_buffer(vmx);
-               if (err)
-                       goto free_vmcs;
-       }
-
        vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
 
        return &vmx->vcpu;
@@ -9136,6 +9129,8 @@ free_vmcs:
        free_loaded_vmcs(vmx->loaded_vmcs);
 free_msrs:
        kfree(vmx->guest_msrs);
+free_pml:
+       vmx_destroy_pml_buffer(vmx);
 uninit_vcpu:
        kvm_vcpu_uninit(&vmx->vcpu);
 free_vcpu:
@@ -9268,15 +9263,6 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
        if (cpu_has_secondary_exec_ctrls())
                vmcs_set_secondary_exec_control(secondary_exec_ctl);
 
-       if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) {
-               if (guest_cpuid_has_pcommit(vcpu))
-                       vmx->nested.nested_vmx_secondary_ctls_high |=
-                               SECONDARY_EXEC_PCOMMIT;
-               else
-                       vmx->nested.nested_vmx_secondary_ctls_high &=
-                               ~SECONDARY_EXEC_PCOMMIT;
-       }
-
        if (nested_vmx_allowed(vcpu))
                to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |=
                        FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
@@ -9900,8 +9886,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
                exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
                                  SECONDARY_EXEC_RDTSCP |
                                  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
-                                 SECONDARY_EXEC_APIC_REGISTER_VIRT |
-                                 SECONDARY_EXEC_PCOMMIT);
+                                 SECONDARY_EXEC_APIC_REGISTER_VIRT);
                if (nested_cpu_has(vmcs12,
                                CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
                        exec_control |= vmcs12->secondary_vm_exec_control;
@@ -10972,7 +10957,8 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
        struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
 
        if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
-               !irq_remapping_cap(IRQ_POSTING_CAP))
+               !irq_remapping_cap(IRQ_POSTING_CAP)  ||
+               !kvm_vcpu_apicv_active(vcpu))
                return 0;
 
        vcpu->pre_pcpu = vcpu->cpu;
@@ -11049,7 +11035,8 @@ static void pi_post_block(struct kvm_vcpu *vcpu)
        unsigned long flags;
 
        if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
-               !irq_remapping_cap(IRQ_POSTING_CAP))
+               !irq_remapping_cap(IRQ_POSTING_CAP)  ||
+               !kvm_vcpu_apicv_active(vcpu))
                return;
 
        do {
@@ -11110,7 +11097,8 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
        int idx, ret = -EINVAL;
 
        if (!kvm_arch_has_assigned_device(kvm) ||
-               !irq_remapping_cap(IRQ_POSTING_CAP))
+               !irq_remapping_cap(IRQ_POSTING_CAP) ||
+               !kvm_vcpu_apicv_active(kvm->vcpus[0]))
                return 0;
 
        idx = srcu_read_lock(&kvm->irq_srcu);