kvm: mmu: lazy collapse small sptes into large sptes

author Wanpeng Li <wanpeng.li@linux.intel.com>

Fri, 3 Apr 2015 07:40:25 +0000 (15:40 +0800)

committer Paolo Bonzini <pbonzini@redhat.com>

Wed, 8 Apr 2015 08:47:04 +0000 (10:47 +0200)
author Wanpeng Li <wanpeng.li@linux.intel.com>
Fri, 3 Apr 2015 07:40:25 +0000 (15:40 +0800)
committer Paolo Bonzini <pbonzini@redhat.com>
Wed, 8 Apr 2015 08:47:04 +0000 (10:47 +0200)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

index 9f1d66e..dea2e7e 100644 (file)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -867,6 +867,8 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
  void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
  void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
                                       struct kvm_memory_slot *memslot);
+void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
+                                       struct kvm_memory_slot *memslot);
  void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
                                    struct kvm_memory_slot *memslot);
  void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c

index cee7592..146f295 100644 (file)
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4465,6 +4465,79 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
                 kvm_flush_remote_tlbs(kvm);
  }
  
+static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
+               unsigned long *rmapp)
+{
+       u64 *sptep;
+       struct rmap_iterator iter;
+       int need_tlb_flush = 0;
+       pfn_t pfn;
+       struct kvm_mmu_page *sp;
+
+       for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
+               BUG_ON(!(*sptep & PT_PRESENT_MASK));
+
+               sp = page_header(__pa(sptep));
+               pfn = spte_to_pfn(*sptep);
+
+               /*
+                * Only EPT supported for now; otherwise, one would need to
+                * find out efficiently whether the guest page tables are
+                * also using huge pages.
+                */
+               if (sp->role.direct &&
+                       !kvm_is_reserved_pfn(pfn) &&
+                       PageTransCompound(pfn_to_page(pfn))) {
+                       drop_spte(kvm, sptep);
+                       sptep = rmap_get_first(*rmapp, &iter);
+                       need_tlb_flush = 1;
+               } else
+                       sptep = rmap_get_next(&iter);
+       }
+
+       return need_tlb_flush;
+}
+
+void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
+                       struct kvm_memory_slot *memslot)
+{
+       bool flush = false;
+       unsigned long *rmapp;
+       unsigned long last_index, index;
+       gfn_t gfn_start, gfn_end;
+
+       spin_lock(&kvm->mmu_lock);
+
+       gfn_start = memslot->base_gfn;
+       gfn_end = memslot->base_gfn + memslot->npages - 1;
+
+       if (gfn_start >= gfn_end)
+               goto out;
+
+       rmapp = memslot->arch.rmap[0];
+       last_index = gfn_to_index(gfn_end, memslot->base_gfn,
+                                       PT_PAGE_TABLE_LEVEL);
+
+       for (index = 0; index <= last_index; ++index, ++rmapp) {
+               if (*rmapp)
+                       flush |= kvm_mmu_zap_collapsible_spte(kvm, rmapp);
+
+               if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
+                       if (flush) {
+                               kvm_flush_remote_tlbs(kvm);
+                               flush = false;
+                       }
+                       cond_resched_lock(&kvm->mmu_lock);
+               }
+       }
+
+       if (flush)
+               kvm_flush_remote_tlbs(kvm);
+
+out:
+       spin_unlock(&kvm->mmu_lock);
+}
+
  void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
                                    struct kvm_memory_slot *memslot)
  {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index faf044d..b8cb1d0 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7664,6 +7664,23 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
         /* It's OK to get 'new' slot here as it has already been installed */
         new = id_to_memslot(kvm->memslots, mem->slot);
  
+       /*
+        * Dirty logging tracks sptes in 4k granularity, meaning that large
+        * sptes have to be split.  If live migration is successful, the guest
+        * in the source machine will be destroyed and large sptes will be
+        * created in the destination. However, if the guest continues to run
+        * in the source machine (for example if live migration fails), small
+        * sptes will remain around and cause bad performance.
+        *
+        * Scan sptes if dirty logging has been stopped, dropping those
+        * which can be collapsed into a single large-page spte.  Later
+        * page faults will create the large-page sptes.
+        */
+       if ((change != KVM_MR_DELETE) &&
+               (old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
+               !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
+               kvm_mmu_zap_collapsible_sptes(kvm, new);
+
         /*
          * Set up write protection and/or dirty logging for the new slot.
          *
author	Wanpeng Li <wanpeng.li@linux.intel.com>
	Fri, 3 Apr 2015 07:40:25 +0000 (15:40 +0800)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Wed, 8 Apr 2015 08:47:04 +0000 (10:47 +0200)
arch/x86/include/asm/kvm_host.h		patch \| blob \| history
arch/x86/kvm/mmu.c		patch \| blob \| history
arch/x86/kvm/x86.c		patch \| blob \| history