Merge tag 'iwlwifi-for-john-2014-10-23' of git://git.kernel.org/pub/scm/linux/kernel...
[cascardo/linux.git] / arch / arm / kvm / mmu.c
index eea0306..57a403a 100644 (file)
@@ -42,7 +42,7 @@ static unsigned long hyp_idmap_start;
 static unsigned long hyp_idmap_end;
 static phys_addr_t hyp_idmap_vector;
 
-#define pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
+#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
 
 #define kvm_pmd_huge(_x)       (pmd_huge(_x) || pmd_trans_huge(_x))
 
@@ -134,7 +134,7 @@ static void unmap_ptes(struct kvm *kvm, pmd_t *pmd,
                }
        } while (pte++, addr += PAGE_SIZE, addr != end);
 
-       if (kvm_pte_table_empty(start_pte))
+       if (kvm_pte_table_empty(kvm, start_pte))
                clear_pmd_entry(kvm, pmd, start_addr);
 }
 
@@ -158,7 +158,7 @@ static void unmap_pmds(struct kvm *kvm, pud_t *pud,
                }
        } while (pmd++, addr = next, addr != end);
 
-       if (kvm_pmd_table_empty(start_pmd))
+       if (kvm_pmd_table_empty(kvm, start_pmd))
                clear_pud_entry(kvm, pud, start_addr);
 }
 
@@ -182,7 +182,7 @@ static void unmap_puds(struct kvm *kvm, pgd_t *pgd,
                }
        } while (pud++, addr = next, addr != end);
 
-       if (kvm_pud_table_empty(start_pud))
+       if (kvm_pud_table_empty(kvm, start_pud))
                clear_pgd_entry(kvm, pgd, start_addr);
 }
 
@@ -306,7 +306,7 @@ void free_boot_hyp_pgd(void)
        if (boot_hyp_pgd) {
                unmap_range(NULL, boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
                unmap_range(NULL, boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE);
-               free_pages((unsigned long)boot_hyp_pgd, pgd_order);
+               free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
                boot_hyp_pgd = NULL;
        }
 
@@ -343,7 +343,7 @@ void free_hyp_pgds(void)
                for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE)
                        unmap_range(NULL, hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE);
 
-               free_pages((unsigned long)hyp_pgd, pgd_order);
+               free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
                hyp_pgd = NULL;
        }
 
@@ -401,13 +401,46 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
        return 0;
 }
 
+static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start,
+                                  unsigned long end, unsigned long pfn,
+                                  pgprot_t prot)
+{
+       pud_t *pud;
+       pmd_t *pmd;
+       unsigned long addr, next;
+       int ret;
+
+       addr = start;
+       do {
+               pud = pud_offset(pgd, addr);
+
+               if (pud_none_or_clear_bad(pud)) {
+                       pmd = pmd_alloc_one(NULL, addr);
+                       if (!pmd) {
+                               kvm_err("Cannot allocate Hyp pmd\n");
+                               return -ENOMEM;
+                       }
+                       pud_populate(NULL, pud, pmd);
+                       get_page(virt_to_page(pud));
+                       kvm_flush_dcache_to_poc(pud, sizeof(*pud));
+               }
+
+               next = pud_addr_end(addr, end);
+               ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
+               if (ret)
+                       return ret;
+               pfn += (next - addr) >> PAGE_SHIFT;
+       } while (addr = next, addr != end);
+
+       return 0;
+}
+
 static int __create_hyp_mappings(pgd_t *pgdp,
                                 unsigned long start, unsigned long end,
                                 unsigned long pfn, pgprot_t prot)
 {
        pgd_t *pgd;
        pud_t *pud;
-       pmd_t *pmd;
        unsigned long addr, next;
        int err = 0;
 
@@ -416,22 +449,21 @@ static int __create_hyp_mappings(pgd_t *pgdp,
        end = PAGE_ALIGN(end);
        do {
                pgd = pgdp + pgd_index(addr);
-               pud = pud_offset(pgd, addr);
 
-               if (pud_none_or_clear_bad(pud)) {
-                       pmd = pmd_alloc_one(NULL, addr);
-                       if (!pmd) {
-                               kvm_err("Cannot allocate Hyp pmd\n");
+               if (pgd_none(*pgd)) {
+                       pud = pud_alloc_one(NULL, addr);
+                       if (!pud) {
+                               kvm_err("Cannot allocate Hyp pud\n");
                                err = -ENOMEM;
                                goto out;
                        }
-                       pud_populate(NULL, pud, pmd);
-                       get_page(virt_to_page(pud));
-                       kvm_flush_dcache_to_poc(pud, sizeof(*pud));
+                       pgd_populate(NULL, pgd, pud);
+                       get_page(virt_to_page(pgd));
+                       kvm_flush_dcache_to_poc(pgd, sizeof(*pgd));
                }
 
                next = pgd_addr_end(addr, end);
-               err = create_hyp_pmd_mappings(pud, addr, next, pfn, prot);
+               err = create_hyp_pud_mappings(pgd, addr, next, pfn, prot);
                if (err)
                        goto out;
                pfn += (next - addr) >> PAGE_SHIFT;
@@ -521,6 +553,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
  */
 int kvm_alloc_stage2_pgd(struct kvm *kvm)
 {
+       int ret;
        pgd_t *pgd;
 
        if (kvm->arch.pgd != NULL) {
@@ -528,15 +561,38 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
                return -EINVAL;
        }
 
-       pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, S2_PGD_ORDER);
+       if (KVM_PREALLOC_LEVEL > 0) {
+               /*
+                * Allocate fake pgd for the page table manipulation macros to
+                * work.  This is not used by the hardware and we have no
+                * alignment requirement for this allocation.
+                */
+               pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
+                                      GFP_KERNEL | __GFP_ZERO);
+       } else {
+               /*
+                * Allocate actual first-level Stage-2 page table used by the
+                * hardware for Stage-2 page table walks.
+                */
+               pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER);
+       }
+
        if (!pgd)
                return -ENOMEM;
 
-       memset(pgd, 0, PTRS_PER_S2_PGD * sizeof(pgd_t));
+       ret = kvm_prealloc_hwpgd(kvm, pgd);
+       if (ret)
+               goto out_err;
+
        kvm_clean_pgd(pgd);
        kvm->arch.pgd = pgd;
-
        return 0;
+out_err:
+       if (KVM_PREALLOC_LEVEL > 0)
+               kfree(pgd);
+       else
+               free_pages((unsigned long)pgd, S2_PGD_ORDER);
+       return ret;
 }
 
 /**
@@ -572,19 +628,39 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
                return;
 
        unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
-       free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
+       kvm_free_hwpgd(kvm);
+       if (KVM_PREALLOC_LEVEL > 0)
+               kfree(kvm->arch.pgd);
+       else
+               free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
        kvm->arch.pgd = NULL;
 }
 
-static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
                             phys_addr_t addr)
 {
        pgd_t *pgd;
        pud_t *pud;
-       pmd_t *pmd;
 
        pgd = kvm->arch.pgd + pgd_index(addr);
-       pud = pud_offset(pgd, addr);
+       if (WARN_ON(pgd_none(*pgd))) {
+               if (!cache)
+                       return NULL;
+               pud = mmu_memory_cache_alloc(cache);
+               pgd_populate(NULL, pgd, pud);
+               get_page(virt_to_page(pgd));
+       }
+
+       return pud_offset(pgd, addr);
+}
+
+static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
+                            phys_addr_t addr)
+{
+       pud_t *pud;
+       pmd_t *pmd;
+
+       pud = stage2_get_pud(kvm, cache, addr);
        if (pud_none(*pud)) {
                if (!cache)
                        return NULL;
@@ -630,7 +706,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
        pmd_t *pmd;
        pte_t *pte, old_pte;
 
-       /* Create stage-2 page table mapping - Level 1 */
+       /* Create stage-2 page table mapping - Levels 0 and 1 */
        pmd = stage2_get_pmd(kvm, cache, addr);
        if (!pmd) {
                /*
@@ -675,7 +751,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
  * @size:      The size of the mapping
  */
 int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
-                         phys_addr_t pa, unsigned long size)
+                         phys_addr_t pa, unsigned long size, bool writable)
 {
        phys_addr_t addr, end;
        int ret = 0;
@@ -688,7 +764,11 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
        for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) {
                pte_t pte = pfn_pte(pfn, PAGE_S2_DEVICE);
 
-               ret = mmu_topup_memory_cache(&cache, 2, 2);
+               if (writable)
+                       kvm_set_s2pte_writable(&pte);
+
+               ret = mmu_topup_memory_cache(&cache, KVM_MMU_CACHE_MIN_PAGES,
+                                               KVM_NR_MEM_OBJS);
                if (ret)
                        goto out;
                spin_lock(&kvm->mmu_lock);
@@ -777,6 +857,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
        /* Let's check if we will get back a huge page backed by hugetlbfs */
        down_read(&current->mm->mmap_sem);
        vma = find_vma_intersection(current->mm, hva, hva + 1);
+       if (unlikely(!vma)) {
+               kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
+               up_read(&current->mm->mmap_sem);
+               return -EFAULT;
+       }
+
        if (is_vm_hugetlb_page(vma)) {
                hugetlb = true;
                gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
@@ -797,7 +883,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
        up_read(&current->mm->mmap_sem);
 
        /* We need minimum second+third level pages */
-       ret = mmu_topup_memory_cache(memcache, 2, KVM_NR_MEM_OBJS);
+       ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES,
+                                    KVM_NR_MEM_OBJS);
        if (ret)
                return ret;
 
@@ -843,7 +930,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                }
                coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
                ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
-                                    mem_type == PAGE_S2_DEVICE);
+                       pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
        }
 
 
@@ -916,6 +1003,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
                goto out_unlock;
        }
 
+       /* Userspace should not be able to register out-of-bounds IPAs */
+       VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE);
+
        ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
        if (ret == 0)
                ret = 1;
@@ -1072,8 +1162,8 @@ int kvm_mmu_init(void)
                         (unsigned long)phys_base);
        }
 
-       hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order);
-       boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, pgd_order);
+       hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
+       boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order);
 
        if (!hyp_pgd || !boot_hyp_pgd) {
                kvm_err("Hyp mode PGD not allocated\n");
@@ -1126,13 +1216,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
                                   const struct kvm_memory_slot *old,
                                   enum kvm_mr_change change)
 {
-       gpa_t gpa = old->base_gfn << PAGE_SHIFT;
-       phys_addr_t size = old->npages << PAGE_SHIFT;
-       if (change == KVM_MR_DELETE || change == KVM_MR_MOVE) {
-               spin_lock(&kvm->mmu_lock);
-               unmap_stage2_range(kvm, gpa, size);
-               spin_unlock(&kvm->mmu_lock);
-       }
 }
 
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
@@ -1140,7 +1223,77 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
                                   struct kvm_userspace_memory_region *mem,
                                   enum kvm_mr_change change)
 {
-       return 0;
+       hva_t hva = mem->userspace_addr;
+       hva_t reg_end = hva + mem->memory_size;
+       bool writable = !(mem->flags & KVM_MEM_READONLY);
+       int ret = 0;
+
+       if (change != KVM_MR_CREATE && change != KVM_MR_MOVE)
+               return 0;
+
+       /*
+        * Prevent userspace from creating a memory region outside of the IPA
+        * space addressable by the KVM guest IPA space.
+        */
+       if (memslot->base_gfn + memslot->npages >=
+           (KVM_PHYS_SIZE >> PAGE_SHIFT))
+               return -EFAULT;
+
+       /*
+        * A memory region could potentially cover multiple VMAs, and any holes
+        * between them, so iterate over all of them to find out if we can map
+        * any of them right now.
+        *
+        *     +--------------------------------------------+
+        * +---------------+----------------+   +----------------+
+        * |   : VMA 1     |      VMA 2     |   |    VMA 3  :    |
+        * +---------------+----------------+   +----------------+
+        *     |               memory region                |
+        *     +--------------------------------------------+
+        */
+       do {
+               struct vm_area_struct *vma = find_vma(current->mm, hva);
+               hva_t vm_start, vm_end;
+
+               if (!vma || vma->vm_start >= reg_end)
+                       break;
+
+               /*
+                * Mapping a read-only VMA is only allowed if the
+                * memory region is configured as read-only.
+                */
+               if (writable && !(vma->vm_flags & VM_WRITE)) {
+                       ret = -EPERM;
+                       break;
+               }
+
+               /*
+                * Take the intersection of this VMA with the memory region
+                */
+               vm_start = max(hva, vma->vm_start);
+               vm_end = min(reg_end, vma->vm_end);
+
+               if (vma->vm_flags & VM_PFNMAP) {
+                       gpa_t gpa = mem->guest_phys_addr +
+                                   (vm_start - mem->userspace_addr);
+                       phys_addr_t pa = (vma->vm_pgoff << PAGE_SHIFT) +
+                                        vm_start - vma->vm_start;
+
+                       ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
+                                                   vm_end - vm_start,
+                                                   writable);
+                       if (ret)
+                               break;
+               }
+               hva = vm_end;
+       } while (hva < reg_end);
+
+       if (ret) {
+               spin_lock(&kvm->mmu_lock);
+               unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
+               spin_unlock(&kvm->mmu_lock);
+       }
+       return ret;
 }
 
 void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
@@ -1165,4 +1318,10 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
                                   struct kvm_memory_slot *slot)
 {
+       gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
+       phys_addr_t size = slot->npages << PAGE_SHIFT;
+
+       spin_lock(&kvm->mmu_lock);
+       unmap_stage2_range(kvm, gpa, size);
+       spin_unlock(&kvm->mmu_lock);
 }