Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux...
[cascardo/linux.git] / drivers / hv / hv_balloon.c
index 18766f6..fdf8da9 100644 (file)
@@ -547,7 +547,11 @@ struct hv_dynmem_device {
         */
        struct task_struct *thread;
 
-       struct mutex ha_region_mutex;
+       /*
+        * Protects ha_region_list, num_pages_onlined counter and individual
+        * regions from ha_region_list.
+        */
+       spinlock_t ha_lock;
 
        /*
         * A list of hot-add regions.
@@ -571,18 +575,14 @@ static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
                              void *v)
 {
        struct memory_notify *mem = (struct memory_notify *)v;
+       unsigned long flags;
 
        switch (val) {
-       case MEM_GOING_ONLINE:
-               mutex_lock(&dm_device.ha_region_mutex);
-               break;
-
        case MEM_ONLINE:
+               spin_lock_irqsave(&dm_device.ha_lock, flags);
                dm_device.num_pages_onlined += mem->nr_pages;
+               spin_unlock_irqrestore(&dm_device.ha_lock, flags);
        case MEM_CANCEL_ONLINE:
-               if (val == MEM_ONLINE ||
-                   mutex_is_locked(&dm_device.ha_region_mutex))
-                       mutex_unlock(&dm_device.ha_region_mutex);
                if (dm_device.ha_waiting) {
                        dm_device.ha_waiting = false;
                        complete(&dm_device.ol_waitevent);
@@ -590,10 +590,11 @@ static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
                break;
 
        case MEM_OFFLINE:
-               mutex_lock(&dm_device.ha_region_mutex);
+               spin_lock_irqsave(&dm_device.ha_lock, flags);
                dm_device.num_pages_onlined -= mem->nr_pages;
-               mutex_unlock(&dm_device.ha_region_mutex);
+               spin_unlock_irqrestore(&dm_device.ha_lock, flags);
                break;
+       case MEM_GOING_ONLINE:
        case MEM_GOING_OFFLINE:
        case MEM_CANCEL_OFFLINE:
                break;
@@ -657,9 +658,12 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
        unsigned long start_pfn;
        unsigned long processed_pfn;
        unsigned long total_pfn = pfn_count;
+       unsigned long flags;
 
        for (i = 0; i < (size/HA_CHUNK); i++) {
                start_pfn = start + (i * HA_CHUNK);
+
+               spin_lock_irqsave(&dm_device.ha_lock, flags);
                has->ha_end_pfn +=  HA_CHUNK;
 
                if (total_pfn > HA_CHUNK) {
@@ -671,11 +675,11 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
                }
 
                has->covered_end_pfn +=  processed_pfn;
+               spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 
                init_completion(&dm_device.ol_waitevent);
-               dm_device.ha_waiting = true;
+               dm_device.ha_waiting = !memhp_auto_online;
 
-               mutex_unlock(&dm_device.ha_region_mutex);
                nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
                ret = add_memory(nid, PFN_PHYS((start_pfn)),
                                (HA_CHUNK << PAGE_SHIFT));
@@ -692,20 +696,23 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
                                 */
                                do_hot_add = false;
                        }
+                       spin_lock_irqsave(&dm_device.ha_lock, flags);
                        has->ha_end_pfn -= HA_CHUNK;
                        has->covered_end_pfn -=  processed_pfn;
-                       mutex_lock(&dm_device.ha_region_mutex);
+                       spin_unlock_irqrestore(&dm_device.ha_lock, flags);
                        break;
                }
 
                /*
-                * Wait for the memory block to be onlined.
-                * Since the hot add has succeeded, it is ok to
-                * proceed even if the pages in the hot added region
-                * have not been "onlined" within the allowed time.
+                * Wait for the memory block to be onlined when memory onlining
+                * is done outside of kernel (memhp_auto_online). Since the hot
+                * add has succeeded, it is ok to proceed even if the pages in
+                * the hot added region have not been "onlined" within the
+                * allowed time.
                 */
-               wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ);
-               mutex_lock(&dm_device.ha_region_mutex);
+               if (dm_device.ha_waiting)
+                       wait_for_completion_timeout(&dm_device.ol_waitevent,
+                                                   5*HZ);
                post_status(&dm_device);
        }
 
@@ -714,13 +721,13 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
 
 static void hv_online_page(struct page *pg)
 {
-       struct list_head *cur;
        struct hv_hotadd_state *has;
        unsigned long cur_start_pgp;
        unsigned long cur_end_pgp;
+       unsigned long flags;
 
-       list_for_each(cur, &dm_device.ha_region_list) {
-               has = list_entry(cur, struct hv_hotadd_state, list);
+       spin_lock_irqsave(&dm_device.ha_lock, flags);
+       list_for_each_entry(has, &dm_device.ha_region_list, list) {
                cur_start_pgp = (unsigned long)
                        pfn_to_page(has->start_pfn);
                cur_end_pgp = (unsigned long)pfn_to_page(has->end_pfn);
@@ -733,21 +740,19 @@ static void hv_online_page(struct page *pg)
                hv_page_online_one(has, pg);
                break;
        }
+       spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 }
 
 static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
 {
-       struct list_head *cur;
        struct hv_hotadd_state *has;
        struct hv_hotadd_gap *gap;
        unsigned long residual, new_inc;
+       int ret = 0;
+       unsigned long flags;
 
-       if (list_empty(&dm_device.ha_region_list))
-               return false;
-
-       list_for_each(cur, &dm_device.ha_region_list) {
-               has = list_entry(cur, struct hv_hotadd_state, list);
-
+       spin_lock_irqsave(&dm_device.ha_lock, flags);
+       list_for_each_entry(has, &dm_device.ha_region_list, list) {
                /*
                 * If the pfn range we are dealing with is not in the current
                 * "hot add block", move on.
@@ -761,8 +766,10 @@ static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
                 */
                if (has->covered_end_pfn != start_pfn) {
                        gap = kzalloc(sizeof(struct hv_hotadd_gap), GFP_ATOMIC);
-                       if (!gap)
-                               return -ENOMEM;
+                       if (!gap) {
+                               ret = -ENOMEM;
+                               break;
+                       }
 
                        INIT_LIST_HEAD(&gap->list);
                        gap->start_pfn = has->covered_end_pfn;
@@ -788,10 +795,12 @@ static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
                        has->end_pfn += new_inc;
                }
 
-               return 1;
+               ret = 1;
+               break;
        }
+       spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 
-       return 0;
+       return ret;
 }
 
 static unsigned long handle_pg_range(unsigned long pg_start,
@@ -800,17 +809,13 @@ static unsigned long handle_pg_range(unsigned long pg_start,
        unsigned long start_pfn = pg_start;
        unsigned long pfn_cnt = pg_count;
        unsigned long size;
-       struct list_head *cur;
        struct hv_hotadd_state *has;
        unsigned long pgs_ol = 0;
        unsigned long old_covered_state;
+       unsigned long res = 0, flags;
 
-       if (list_empty(&dm_device.ha_region_list))
-               return 0;
-
-       list_for_each(cur, &dm_device.ha_region_list) {
-               has = list_entry(cur, struct hv_hotadd_state, list);
-
+       spin_lock_irqsave(&dm_device.ha_lock, flags);
+       list_for_each_entry(has, &dm_device.ha_region_list, list) {
                /*
                 * If the pfn range we are dealing with is not in the current
                 * "hot add block", move on.
@@ -860,17 +865,20 @@ static unsigned long handle_pg_range(unsigned long pg_start,
                        } else {
                                pfn_cnt = size;
                        }
+                       spin_unlock_irqrestore(&dm_device.ha_lock, flags);
                        hv_mem_hot_add(has->ha_end_pfn, size, pfn_cnt, has);
+                       spin_lock_irqsave(&dm_device.ha_lock, flags);
                }
                /*
                 * If we managed to online any pages that were given to us,
                 * we declare success.
                 */
-               return has->covered_end_pfn - old_covered_state;
-
+               res = has->covered_end_pfn - old_covered_state;
+               break;
        }
+       spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 
-       return 0;
+       return res;
 }
 
 static unsigned long process_hot_add(unsigned long pg_start,
@@ -880,6 +888,7 @@ static unsigned long process_hot_add(unsigned long pg_start,
 {
        struct hv_hotadd_state *ha_region = NULL;
        int covered;
+       unsigned long flags;
 
        if (pfn_cnt == 0)
                return 0;
@@ -905,12 +914,15 @@ static unsigned long process_hot_add(unsigned long pg_start,
                INIT_LIST_HEAD(&ha_region->list);
                INIT_LIST_HEAD(&ha_region->gap_list);
 
-               list_add_tail(&ha_region->list, &dm_device.ha_region_list);
                ha_region->start_pfn = rg_start;
                ha_region->ha_end_pfn = rg_start;
                ha_region->covered_start_pfn = pg_start;
                ha_region->covered_end_pfn = pg_start;
                ha_region->end_pfn = rg_start + rg_size;
+
+               spin_lock_irqsave(&dm_device.ha_lock, flags);
+               list_add_tail(&ha_region->list, &dm_device.ha_region_list);
+               spin_unlock_irqrestore(&dm_device.ha_lock, flags);
        }
 
 do_pg_range:
@@ -937,7 +949,6 @@ static void hot_add_req(struct work_struct *dummy)
        resp.hdr.size = sizeof(struct dm_hot_add_response);
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-       mutex_lock(&dm_device.ha_region_mutex);
        pg_start = dm->ha_wrk.ha_page_range.finfo.start_page;
        pfn_cnt = dm->ha_wrk.ha_page_range.finfo.page_cnt;
 
@@ -971,7 +982,6 @@ static void hot_add_req(struct work_struct *dummy)
                                                rg_start, rg_sz);
 
        dm->num_pages_added += resp.page_count;
-       mutex_unlock(&dm_device.ha_region_mutex);
 #endif
        /*
         * The result field of the response structure has the
@@ -1065,7 +1075,6 @@ static unsigned long compute_balloon_floor(void)
 static void post_status(struct hv_dynmem_device *dm)
 {
        struct dm_status status;
-       struct sysinfo val;
        unsigned long now = jiffies;
        unsigned long last_post = last_post_time;
 
@@ -1077,7 +1086,6 @@ static void post_status(struct hv_dynmem_device *dm)
        if (!time_after(now, (last_post_time + HZ)))
                return;
 
-       si_meminfo(&val);
        memset(&status, 0, sizeof(struct dm_status));
        status.hdr.type = DM_STATUS_REPORT;
        status.hdr.size = sizeof(struct dm_status);
@@ -1093,7 +1101,7 @@ static void post_status(struct hv_dynmem_device *dm)
         * num_pages_onlined) as committed to the host, otherwise it can try
         * asking us to balloon them out.
         */
-       status.num_avail = val.freeram;
+       status.num_avail = si_mem_available();
        status.num_committed = vm_memory_committed() +
                dm->num_pages_ballooned +
                (dm->num_pages_added > dm->num_pages_onlined ?
@@ -1199,7 +1207,7 @@ static void balloon_up(struct work_struct *dummy)
        int ret;
        bool done = false;
        int i;
-       struct sysinfo val;
+       long avail_pages;
        unsigned long floor;
 
        /* The host balloons pages in 2M granularity. */
@@ -1211,12 +1219,12 @@ static void balloon_up(struct work_struct *dummy)
         */
        alloc_unit = 512;
 
-       si_meminfo(&val);
+       avail_pages = si_mem_available();
        floor = compute_balloon_floor();
 
        /* Refuse to balloon below the floor, keep the 2M granularity. */
-       if (val.freeram < num_pages || val.freeram - num_pages < floor) {
-               num_pages = val.freeram > floor ? (val.freeram - floor) : 0;
+       if (avail_pages < num_pages || avail_pages - num_pages < floor) {
+               num_pages = avail_pages > floor ? (avail_pages - floor) : 0;
                num_pages -= num_pages % PAGES_IN_2M;
        }
 
@@ -1227,7 +1235,6 @@ static void balloon_up(struct work_struct *dummy)
                bl_resp->hdr.size = sizeof(struct dm_balloon_response);
                bl_resp->more_pages = 1;
 
-
                num_pages -= num_ballooned;
                num_ballooned = alloc_balloon_pages(&dm_device, num_pages,
                                                    bl_resp, alloc_unit);
@@ -1516,7 +1523,7 @@ static int balloon_probe(struct hv_device *dev,
        init_completion(&dm_device.host_event);
        init_completion(&dm_device.config_event);
        INIT_LIST_HEAD(&dm_device.ha_region_list);
-       mutex_init(&dm_device.ha_region_mutex);
+       spin_lock_init(&dm_device.ha_lock);
        INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
        INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req);
        dm_device.host_specified_ha_region = false;
@@ -1635,9 +1642,9 @@ probe_error0:
 static int balloon_remove(struct hv_device *dev)
 {
        struct hv_dynmem_device *dm = hv_get_drvdata(dev);
-       struct list_head *cur, *tmp;
-       struct hv_hotadd_state *has;
+       struct hv_hotadd_state *has, *tmp;
        struct hv_hotadd_gap *gap, *tmp_gap;
+       unsigned long flags;
 
        if (dm->num_pages_ballooned != 0)
                pr_warn("Ballooned pages: %d\n", dm->num_pages_ballooned);
@@ -1652,8 +1659,8 @@ static int balloon_remove(struct hv_device *dev)
        restore_online_page_callback(&hv_online_page);
        unregister_memory_notifier(&hv_memory_nb);
 #endif
-       list_for_each_safe(cur, tmp, &dm->ha_region_list) {
-               has = list_entry(cur, struct hv_hotadd_state, list);
+       spin_lock_irqsave(&dm_device.ha_lock, flags);
+       list_for_each_entry_safe(has, tmp, &dm->ha_region_list, list) {
                list_for_each_entry_safe(gap, tmp_gap, &has->gap_list, list) {
                        list_del(&gap->list);
                        kfree(gap);
@@ -1661,6 +1668,7 @@ static int balloon_remove(struct hv_device *dev)
                list_del(&has->list);
                kfree(has);
        }
+       spin_unlock_irqrestore(&dm_device.ha_lock, flags);
 
        return 0;
 }