Merge branch 'for-linus' of git://git.samba.org/sfrench/cifs-2.6
[cascardo/linux.git] / mm / hugetlb.c
index 2024bbd..eeceeeb 100644 (file)
@@ -35,7 +35,6 @@
 #include <linux/node.h>
 #include "internal.h"
 
-const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
 unsigned long hugepages_treat_as_movable;
 
 int hugetlb_max_hstate __read_mostly;
@@ -856,7 +855,7 @@ struct hstate *size_to_hstate(unsigned long size)
        return NULL;
 }
 
-static void free_huge_page(struct page *page)
+void free_huge_page(struct page *page)
 {
        /*
         * Can't pass hstate in here because it is called from the
@@ -1089,6 +1088,9 @@ void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
        unsigned long pfn;
        struct hstate *h;
 
+       if (!hugepages_supported())
+               return;
+
        /* Set scan step to minimum hugepage size */
        for_each_hstate(h)
                if (order > huge_page_order(h))
@@ -1734,21 +1736,13 @@ static ssize_t nr_hugepages_show_common(struct kobject *kobj,
        return sprintf(buf, "%lu\n", nr_huge_pages);
 }
 
-static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
-                       struct kobject *kobj, struct kobj_attribute *attr,
-                       const char *buf, size_t len)
+static ssize_t __nr_hugepages_store_common(bool obey_mempolicy,
+                                          struct hstate *h, int nid,
+                                          unsigned long count, size_t len)
 {
        int err;
-       int nid;
-       unsigned long count;
-       struct hstate *h;
        NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL | __GFP_NORETRY);
 
-       err = kstrtoul(buf, 10, &count);
-       if (err)
-               goto out;
-
-       h = kobj_to_hstate(kobj, &nid);
        if (hstate_is_gigantic(h) && !gigantic_page_supported()) {
                err = -EINVAL;
                goto out;
@@ -1784,6 +1778,23 @@ out:
        return err;
 }
 
+static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
+                                        struct kobject *kobj, const char *buf,
+                                        size_t len)
+{
+       struct hstate *h;
+       unsigned long count;
+       int nid;
+       int err;
+
+       err = kstrtoul(buf, 10, &count);
+       if (err)
+               return err;
+
+       h = kobj_to_hstate(kobj, &nid);
+       return __nr_hugepages_store_common(obey_mempolicy, h, nid, count, len);
+}
+
 static ssize_t nr_hugepages_show(struct kobject *kobj,
                                       struct kobj_attribute *attr, char *buf)
 {
@@ -1793,7 +1804,7 @@ static ssize_t nr_hugepages_show(struct kobject *kobj,
 static ssize_t nr_hugepages_store(struct kobject *kobj,
               struct kobj_attribute *attr, const char *buf, size_t len)
 {
-       return nr_hugepages_store_common(false, kobj, attr, buf, len);
+       return nr_hugepages_store_common(false, kobj, buf, len);
 }
 HSTATE_ATTR(nr_hugepages);
 
@@ -1812,7 +1823,7 @@ static ssize_t nr_hugepages_mempolicy_show(struct kobject *kobj,
 static ssize_t nr_hugepages_mempolicy_store(struct kobject *kobj,
               struct kobj_attribute *attr, const char *buf, size_t len)
 {
-       return nr_hugepages_store_common(true, kobj, attr, buf, len);
+       return nr_hugepages_store_common(true, kobj, buf, len);
 }
 HSTATE_ATTR(nr_hugepages_mempolicy);
 #endif
@@ -2248,36 +2259,21 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
                         void __user *buffer, size_t *length, loff_t *ppos)
 {
        struct hstate *h = &default_hstate;
-       unsigned long tmp;
+       unsigned long tmp = h->max_huge_pages;
        int ret;
 
        if (!hugepages_supported())
                return -ENOTSUPP;
 
-       tmp = h->max_huge_pages;
-
-       if (write && hstate_is_gigantic(h) && !gigantic_page_supported())
-               return -EINVAL;
-
        table->data = &tmp;
        table->maxlen = sizeof(unsigned long);
        ret = proc_doulongvec_minmax(table, write, buffer, length, ppos);
        if (ret)
                goto out;
 
-       if (write) {
-               NODEMASK_ALLOC(nodemask_t, nodes_allowed,
-                                               GFP_KERNEL | __GFP_NORETRY);
-               if (!(obey_mempolicy &&
-                              init_nodemask_of_mempolicy(nodes_allowed))) {
-                       NODEMASK_FREE(nodes_allowed);
-                       nodes_allowed = &node_states[N_MEMORY];
-               }
-               h->max_huge_pages = set_max_huge_pages(h, tmp, nodes_allowed);
-
-               if (nodes_allowed != &node_states[N_MEMORY])
-                       NODEMASK_FREE(nodes_allowed);
-       }
+       if (write)
+               ret = __nr_hugepages_store_common(obey_mempolicy, h,
+                                                 NUMA_NO_NODE, tmp, *length);
 out:
        return ret;
 }
@@ -2604,6 +2600,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                } else {
                        if (cow)
                                huge_ptep_set_wrprotect(src, addr, src_pte);
+                       entry = huge_ptep_get(src_pte);
                        ptepage = pte_page(entry);
                        get_page(ptepage);
                        page_dup_rmap(ptepage);
@@ -2753,8 +2750,8 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
  * from other VMAs and let the children be SIGKILLed if they are faulting the
  * same region.
  */
-static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
-                               struct page *page, unsigned long address)
+static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
+                             struct page *page, unsigned long address)
 {
        struct hstate *h = hstate_vma(vma);
        struct vm_area_struct *iter_vma;
@@ -2793,8 +2790,6 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
                                             address + huge_page_size(h), page);
        }
        mutex_unlock(&mapping->i_mmap_mutex);
-
-       return 1;
 }
 
 /*
@@ -2809,7 +2804,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
 {
        struct hstate *h = hstate_vma(vma);
        struct page *old_page, *new_page;
-       int outside_reserve = 0;
+       int ret = 0, outside_reserve = 0;
        unsigned long mmun_start;       /* For mmu_notifiers */
        unsigned long mmun_end;         /* For mmu_notifiers */
 
@@ -2839,14 +2834,14 @@ retry_avoidcopy:
 
        page_cache_get(old_page);
 
-       /* Drop page table lock as buddy allocator may be called */
+       /*
+        * Drop page table lock as buddy allocator may be called. It will
+        * be acquired again before returning to the caller, as expected.
+        */
        spin_unlock(ptl);
        new_page = alloc_huge_page(vma, address, outside_reserve);
 
        if (IS_ERR(new_page)) {
-               long err = PTR_ERR(new_page);
-               page_cache_release(old_page);
-
                /*
                 * If a process owning a MAP_PRIVATE mapping fails to COW,
                 * it is due to references held by a child and an insufficient
@@ -2855,29 +2850,25 @@ retry_avoidcopy:
                 * may get SIGKILLed if it later faults.
                 */
                if (outside_reserve) {
+                       page_cache_release(old_page);
                        BUG_ON(huge_pte_none(pte));
-                       if (unmap_ref_private(mm, vma, old_page, address)) {
-                               BUG_ON(huge_pte_none(pte));
-                               spin_lock(ptl);
-                               ptep = huge_pte_offset(mm, address & huge_page_mask(h));
-                               if (likely(ptep &&
-                                          pte_same(huge_ptep_get(ptep), pte)))
-                                       goto retry_avoidcopy;
-                               /*
-                                * race occurs while re-acquiring page table
-                                * lock, and our job is done.
-                                */
-                               return 0;
-                       }
-                       WARN_ON_ONCE(1);
+                       unmap_ref_private(mm, vma, old_page, address);
+                       BUG_ON(huge_pte_none(pte));
+                       spin_lock(ptl);
+                       ptep = huge_pte_offset(mm, address & huge_page_mask(h));
+                       if (likely(ptep &&
+                                  pte_same(huge_ptep_get(ptep), pte)))
+                               goto retry_avoidcopy;
+                       /*
+                        * race occurs while re-acquiring page table
+                        * lock, and our job is done.
+                        */
+                       return 0;
                }
 
-               /* Caller expects lock to be held */
-               spin_lock(ptl);
-               if (err == -ENOMEM)
-                       return VM_FAULT_OOM;
-               else
-                       return VM_FAULT_SIGBUS;
+               ret = (PTR_ERR(new_page) == -ENOMEM) ?
+                       VM_FAULT_OOM : VM_FAULT_SIGBUS;
+               goto out_release_old;
        }
 
        /*
@@ -2885,11 +2876,8 @@ retry_avoidcopy:
         * anon_vma prepared.
         */
        if (unlikely(anon_vma_prepare(vma))) {
-               page_cache_release(new_page);
-               page_cache_release(old_page);
-               /* Caller expects lock to be held */
-               spin_lock(ptl);
-               return VM_FAULT_OOM;
+               ret = VM_FAULT_OOM;
+               goto out_release_all;
        }
 
        copy_user_huge_page(new_page, old_page, address, vma,
@@ -2899,6 +2887,7 @@ retry_avoidcopy:
        mmun_start = address & huge_page_mask(h);
        mmun_end = mmun_start + huge_page_size(h);
        mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
+
        /*
         * Retake the page table lock to check for racing updates
         * before the page tables are altered
@@ -2919,12 +2908,13 @@ retry_avoidcopy:
        }
        spin_unlock(ptl);
        mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
+out_release_all:
        page_cache_release(new_page);
+out_release_old:
        page_cache_release(old_page);
 
-       /* Caller expects lock to be held */
-       spin_lock(ptl);
-       return 0;
+       spin_lock(ptl); /* Caller expects lock to be held */
+       return ret;
 }
 
 /* Return the pagecache page at a given address within a VMA */