Merge branch 'for-4.9/libnvdimm' into libnvdimm-for-next
authorDan Williams <dan.j.williams@intel.com>
Fri, 7 Oct 2016 23:46:24 +0000 (16:46 -0700)
committerDan Williams <dan.j.williams@intel.com>
Fri, 7 Oct 2016 23:46:24 +0000 (16:46 -0700)
20 files changed:
drivers/acpi/nfit/core.c
drivers/acpi/nfit/mce.c
drivers/acpi/nfit/nfit.h
drivers/nvdimm/bus.c
drivers/nvdimm/core.c
drivers/nvdimm/dimm.c
drivers/nvdimm/dimm_devs.c
drivers/nvdimm/label.c
drivers/nvdimm/namespace_devs.c
drivers/nvdimm/nd-core.h
drivers/nvdimm/nd.h
drivers/nvdimm/pmem.c
drivers/nvdimm/region_devs.c
include/linux/libnvdimm.h
include/linux/nd.h
include/uapi/linux/ndctl.h
tools/testing/nvdimm/Kbuild
tools/testing/nvdimm/test/iomap.c
tools/testing/nvdimm/test/nfit.c
tools/testing/nvdimm/test/nfit_test.h

index e1d5ea6..71a7d07 100644 (file)
@@ -886,6 +886,58 @@ static ssize_t revision_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(revision);
 
+static ssize_t hw_error_scrub_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+       struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
+       struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
+
+       return sprintf(buf, "%d\n", acpi_desc->scrub_mode);
+}
+
+/*
+ * The 'hw_error_scrub' attribute can have the following values written to it:
+ * '0': Switch to the default mode where an exception will only insert
+ *      the address of the memory error into the poison and badblocks lists.
+ * '1': Enable a full scrub to happen if an exception for a memory error is
+ *      received.
+ */
+static ssize_t hw_error_scrub_store(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t size)
+{
+       struct nvdimm_bus_descriptor *nd_desc;
+       ssize_t rc;
+       long val;
+
+       rc = kstrtol(buf, 0, &val);
+       if (rc)
+               return rc;
+
+       device_lock(dev);
+       nd_desc = dev_get_drvdata(dev);
+       if (nd_desc) {
+               struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
+
+               switch (val) {
+               case HW_ERROR_SCRUB_ON:
+                       acpi_desc->scrub_mode = HW_ERROR_SCRUB_ON;
+                       break;
+               case HW_ERROR_SCRUB_OFF:
+                       acpi_desc->scrub_mode = HW_ERROR_SCRUB_OFF;
+                       break;
+               default:
+                       rc = -EINVAL;
+                       break;
+               }
+       }
+       device_unlock(dev);
+       if (rc)
+               return rc;
+       return size;
+}
+static DEVICE_ATTR_RW(hw_error_scrub);
+
 /*
  * This shows the number of full Address Range Scrubs that have been
  * completed since driver load time. Userspace can wait on this using
@@ -958,6 +1010,7 @@ static umode_t nfit_visible(struct kobject *kobj, struct attribute *a, int n)
 static struct attribute *acpi_nfit_attributes[] = {
        &dev_attr_revision.attr,
        &dev_attr_scrub.attr,
+       &dev_attr_hw_error_scrub.attr,
        NULL,
 };
 
@@ -1256,6 +1309,44 @@ static struct nvdimm *acpi_nfit_dimm_by_handle(struct acpi_nfit_desc *acpi_desc,
        return NULL;
 }
 
+void __acpi_nvdimm_notify(struct device *dev, u32 event)
+{
+       struct nfit_mem *nfit_mem;
+       struct acpi_nfit_desc *acpi_desc;
+
+       dev_dbg(dev->parent, "%s: %s: event: %d\n", dev_name(dev), __func__,
+                       event);
+
+       if (event != NFIT_NOTIFY_DIMM_HEALTH) {
+               dev_dbg(dev->parent, "%s: unknown event: %d\n", dev_name(dev),
+                               event);
+               return;
+       }
+
+       acpi_desc = dev_get_drvdata(dev->parent);
+       if (!acpi_desc)
+               return;
+
+       /*
+        * If we successfully retrieved acpi_desc, then we know nfit_mem data
+        * is still valid.
+        */
+       nfit_mem = dev_get_drvdata(dev);
+       if (nfit_mem && nfit_mem->flags_attr)
+               sysfs_notify_dirent(nfit_mem->flags_attr);
+}
+EXPORT_SYMBOL_GPL(__acpi_nvdimm_notify);
+
+static void acpi_nvdimm_notify(acpi_handle handle, u32 event, void *data)
+{
+       struct acpi_device *adev = data;
+       struct device *dev = &adev->dev;
+
+       device_lock(dev->parent);
+       __acpi_nvdimm_notify(dev, event);
+       device_unlock(dev->parent);
+}
+
 static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
                struct nfit_mem *nfit_mem, u32 device_handle)
 {
@@ -1280,6 +1371,13 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
                return force_enable_dimms ? 0 : -ENODEV;
        }
 
+       if (ACPI_FAILURE(acpi_install_notify_handler(adev_dimm->handle,
+               ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify, adev_dimm))) {
+               dev_err(dev, "%s: notification registration failed\n",
+                               dev_name(&adev_dimm->dev));
+               return -ENXIO;
+       }
+
        /*
         * Until standardization materializes we need to consider 4
         * different command sets.  Note, that checking for function0 (bit0)
@@ -1318,18 +1416,41 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
        return 0;
 }
 
+static void shutdown_dimm_notify(void *data)
+{
+       struct acpi_nfit_desc *acpi_desc = data;
+       struct nfit_mem *nfit_mem;
+
+       mutex_lock(&acpi_desc->init_mutex);
+       /*
+        * Clear out the nfit_mem->flags_attr and shut down dimm event
+        * notifications.
+        */
+       list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
+               struct acpi_device *adev_dimm = nfit_mem->adev;
+
+               if (nfit_mem->flags_attr) {
+                       sysfs_put(nfit_mem->flags_attr);
+                       nfit_mem->flags_attr = NULL;
+               }
+               if (adev_dimm)
+                       acpi_remove_notify_handler(adev_dimm->handle,
+                                       ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify);
+       }
+       mutex_unlock(&acpi_desc->init_mutex);
+}
+
 static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
 {
        struct nfit_mem *nfit_mem;
-       int dimm_count = 0;
+       int dimm_count = 0, rc;
+       struct nvdimm *nvdimm;
 
        list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
                struct acpi_nfit_flush_address *flush;
                unsigned long flags = 0, cmd_mask;
-               struct nvdimm *nvdimm;
                u32 device_handle;
                u16 mem_flags;
-               int rc;
 
                device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
                nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, device_handle);
@@ -1382,7 +1503,30 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
 
        }
 
-       return nvdimm_bus_check_dimm_count(acpi_desc->nvdimm_bus, dimm_count);
+       rc = nvdimm_bus_check_dimm_count(acpi_desc->nvdimm_bus, dimm_count);
+       if (rc)
+               return rc;
+
+       /*
+        * Now that dimms are successfully registered, and async registration
+        * is flushed, attempt to enable event notification.
+        */
+       list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
+               struct kernfs_node *nfit_kernfs;
+
+               nvdimm = nfit_mem->nvdimm;
+               nfit_kernfs = sysfs_get_dirent(nvdimm_kobj(nvdimm)->sd, "nfit");
+               if (nfit_kernfs)
+                       nfit_mem->flags_attr = sysfs_get_dirent(nfit_kernfs,
+                                       "flags");
+               sysfs_put(nfit_kernfs);
+               if (!nfit_mem->flags_attr)
+                       dev_warn(acpi_desc->dev, "%s: notifications disabled\n",
+                                       nvdimm_name(nvdimm));
+       }
+
+       return devm_add_action_or_reset(acpi_desc->dev, shutdown_dimm_notify,
+                       acpi_desc);
 }
 
 static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
@@ -1491,9 +1635,9 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
        if (!info)
                return -ENOMEM;
        for (i = 0; i < nr; i++) {
-               struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i];
+               struct nd_mapping_desc *mapping = &ndr_desc->mapping[i];
                struct nfit_set_info_map *map = &info->mapping[i];
-               struct nvdimm *nvdimm = nd_mapping->nvdimm;
+               struct nvdimm *nvdimm = mapping->nvdimm;
                struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
                struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc,
                                spa->range_index, i);
@@ -1917,7 +2061,7 @@ static int acpi_nfit_insert_resource(struct acpi_nfit_desc *acpi_desc,
 }
 
 static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
-               struct nd_mapping *nd_mapping, struct nd_region_desc *ndr_desc,
+               struct nd_mapping_desc *mapping, struct nd_region_desc *ndr_desc,
                struct acpi_nfit_memory_map *memdev,
                struct nfit_spa *nfit_spa)
 {
@@ -1934,12 +2078,12 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
                return -ENODEV;
        }
 
-       nd_mapping->nvdimm = nvdimm;
+       mapping->nvdimm = nvdimm;
        switch (nfit_spa_type(spa)) {
        case NFIT_SPA_PM:
        case NFIT_SPA_VOLATILE:
-               nd_mapping->start = memdev->address;
-               nd_mapping->size = memdev->region_size;
+               mapping->start = memdev->address;
+               mapping->size = memdev->region_size;
                break;
        case NFIT_SPA_DCR:
                nfit_mem = nvdimm_provider_data(nvdimm);
@@ -1947,13 +2091,13 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
                        dev_dbg(acpi_desc->dev, "spa%d %s missing bdw\n",
                                        spa->range_index, nvdimm_name(nvdimm));
                } else {
-                       nd_mapping->size = nfit_mem->bdw->capacity;
-                       nd_mapping->start = nfit_mem->bdw->start_address;
+                       mapping->size = nfit_mem->bdw->capacity;
+                       mapping->start = nfit_mem->bdw->start_address;
                        ndr_desc->num_lanes = nfit_mem->bdw->windows;
                        blk_valid = 1;
                }
 
-               ndr_desc->nd_mapping = nd_mapping;
+               ndr_desc->mapping = mapping;
                ndr_desc->num_mappings = blk_valid;
                ndbr_desc = to_blk_region_desc(ndr_desc);
                ndbr_desc->enable = acpi_nfit_blk_region_enable;
@@ -1979,7 +2123,7 @@ static bool nfit_spa_is_virtual(struct acpi_nfit_system_address *spa)
 static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
                struct nfit_spa *nfit_spa)
 {
-       static struct nd_mapping nd_mappings[ND_MAX_MAPPINGS];
+       static struct nd_mapping_desc mappings[ND_MAX_MAPPINGS];
        struct acpi_nfit_system_address *spa = nfit_spa->spa;
        struct nd_blk_region_desc ndbr_desc;
        struct nd_region_desc *ndr_desc;
@@ -1998,7 +2142,7 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
        }
 
        memset(&res, 0, sizeof(res));
-       memset(&nd_mappings, 0, sizeof(nd_mappings));
+       memset(&mappings, 0, sizeof(mappings));
        memset(&ndbr_desc, 0, sizeof(ndbr_desc));
        res.start = spa->address;
        res.end = res.start + spa->length - 1;
@@ -2014,7 +2158,7 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
 
        list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
                struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
-               struct nd_mapping *nd_mapping;
+               struct nd_mapping_desc *mapping;
 
                if (memdev->range_index != spa->range_index)
                        continue;
@@ -2023,14 +2167,14 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
                                        spa->range_index, ND_MAX_MAPPINGS);
                        return -ENXIO;
                }
-               nd_mapping = &nd_mappings[count++];
-               rc = acpi_nfit_init_mapping(acpi_desc, nd_mapping, ndr_desc,
+               mapping = &mappings[count++];
+               rc = acpi_nfit_init_mapping(acpi_desc, mapping, ndr_desc,
                                memdev, nfit_spa);
                if (rc)
                        goto out;
        }
 
-       ndr_desc->nd_mapping = nd_mappings;
+       ndr_desc->mapping = mappings;
        ndr_desc->num_mappings = count;
        rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa);
        if (rc)
@@ -2678,29 +2822,30 @@ static int acpi_nfit_remove(struct acpi_device *adev)
        return 0;
 }
 
-static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
+void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event)
 {
-       struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev);
+       struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev);
        struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
-       struct device *dev = &adev->dev;
        union acpi_object *obj;
        acpi_status status;
        int ret;
 
        dev_dbg(dev, "%s: event: %d\n", __func__, event);
 
-       device_lock(dev);
+       if (event != NFIT_NOTIFY_UPDATE)
+               return;
+
        if (!dev->driver) {
                /* dev->driver may be null if we're being removed */
                dev_dbg(dev, "%s: no driver found for dev\n", __func__);
-               goto out_unlock;
+               return;
        }
 
        if (!acpi_desc) {
                acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
                if (!acpi_desc)
-                       goto out_unlock;
-               acpi_nfit_desc_init(acpi_desc, &adev->dev);
+                       return;
+               acpi_nfit_desc_init(acpi_desc, dev);
        } else {
                /*
                 * Finish previous registration before considering new
@@ -2710,10 +2855,10 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
        }
 
        /* Evaluate _FIT */
-       status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
+       status = acpi_evaluate_object(handle, "_FIT", NULL, &buf);
        if (ACPI_FAILURE(status)) {
                dev_err(dev, "failed to evaluate _FIT\n");
-               goto out_unlock;
+               return;
        }
 
        obj = buf.pointer;
@@ -2725,9 +2870,14 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
        } else
                dev_err(dev, "Invalid _FIT\n");
        kfree(buf.pointer);
+}
+EXPORT_SYMBOL_GPL(__acpi_nfit_notify);
 
- out_unlock:
-       device_unlock(dev);
+static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
+{
+       device_lock(&adev->dev);
+       __acpi_nfit_notify(&adev->dev, adev->handle, event);
+       device_unlock(&adev->dev);
 }
 
 static const struct acpi_device_id acpi_nfit_ids[] = {
index 161f915..e5ce81c 100644 (file)
@@ -14,6 +14,7 @@
  */
 #include <linux/notifier.h>
 #include <linux/acpi.h>
+#include <linux/nd.h>
 #include <asm/mce.h>
 #include "nfit.h"
 
@@ -62,12 +63,25 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
                }
                mutex_unlock(&acpi_desc->init_mutex);
 
-               /*
-                * We can ignore an -EBUSY here because if an ARS is already
-                * in progress, just let that be the last authoritative one
-                */
-               if (found_match)
+               if (!found_match)
+                       continue;
+
+               /* If this fails due to an -ENOMEM, there is little we can do */
+               nvdimm_bus_add_poison(acpi_desc->nvdimm_bus,
+                               ALIGN(mce->addr, L1_CACHE_BYTES),
+                               L1_CACHE_BYTES);
+               nvdimm_region_notify(nfit_spa->nd_region,
+                               NVDIMM_REVALIDATE_POISON);
+
+               if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) {
+                       /*
+                        * We can ignore an -EBUSY here because if an ARS is
+                        * already in progress, just let that be the last
+                        * authoritative one
+                        */
                        acpi_nfit_ars_rescan(acpi_desc);
+               }
+               break;
        }
 
        mutex_unlock(&acpi_desc_lock);
index e894ded..14296f5 100644 (file)
@@ -78,6 +78,14 @@ enum {
        NFIT_ARS_TIMEOUT = 90,
 };
 
+enum nfit_root_notifiers {
+       NFIT_NOTIFY_UPDATE = 0x80,
+};
+
+enum nfit_dimm_notifiers {
+       NFIT_NOTIFY_DIMM_HEALTH = 0x81,
+};
+
 struct nfit_spa {
        struct list_head list;
        struct nd_region *nd_region;
@@ -124,6 +132,7 @@ struct nfit_mem {
        struct acpi_nfit_system_address *spa_bdw;
        struct acpi_nfit_interleave *idt_dcr;
        struct acpi_nfit_interleave *idt_bdw;
+       struct kernfs_node *flags_attr;
        struct nfit_flush *nfit_flush;
        struct list_head list;
        struct acpi_device *adev;
@@ -152,6 +161,7 @@ struct acpi_nfit_desc {
        struct list_head list;
        struct kernfs_node *scrub_count_state;
        unsigned int scrub_count;
+       unsigned int scrub_mode;
        unsigned int cancel:1;
        unsigned long dimm_cmd_force_en;
        unsigned long bus_cmd_force_en;
@@ -159,6 +169,11 @@ struct acpi_nfit_desc {
                        void *iobuf, u64 len, int rw);
 };
 
+enum scrub_mode {
+       HW_ERROR_SCRUB_OFF,
+       HW_ERROR_SCRUB_ON,
+};
+
 enum nd_blk_mmio_selector {
        BDW,
        DCR,
@@ -223,5 +238,7 @@ static inline struct acpi_nfit_desc *to_acpi_desc(
 
 const u8 *to_nfit_uuid(enum nfit_uuids id);
 int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz);
+void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event);
+void __acpi_nvdimm_notify(struct device *dev, u32 event);
 void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev);
 #endif /* __NFIT_H__ */
index 935866f..a8b6949 100644 (file)
@@ -217,6 +217,8 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
                return rc;
        if (cmd_rc < 0)
                return cmd_rc;
+
+       nvdimm_clear_from_poison_list(nvdimm_bus, phys, len);
        return clear_err.cleared;
 }
 EXPORT_SYMBOL_GPL(nvdimm_clear_poison);
index 4d7bbd2..7ceba08 100644 (file)
@@ -547,11 +547,12 @@ void nvdimm_badblocks_populate(struct nd_region *nd_region,
 }
 EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate);
 
-static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
+static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length,
+                       gfp_t flags)
 {
        struct nd_poison *pl;
 
-       pl = kzalloc(sizeof(*pl), GFP_KERNEL);
+       pl = kzalloc(sizeof(*pl), flags);
        if (!pl)
                return -ENOMEM;
 
@@ -567,7 +568,7 @@ static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
        struct nd_poison *pl;
 
        if (list_empty(&nvdimm_bus->poison_list))
-               return add_poison(nvdimm_bus, addr, length);
+               return add_poison(nvdimm_bus, addr, length, GFP_KERNEL);
 
        /*
         * There is a chance this is a duplicate, check for those first.
@@ -587,7 +588,7 @@ static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
         * as any overlapping ranges will get resolved when the list is consumed
         * and converted to badblocks
         */
-       return add_poison(nvdimm_bus, addr, length);
+       return add_poison(nvdimm_bus, addr, length, GFP_KERNEL);
 }
 
 int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
@@ -602,6 +603,70 @@ int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
 }
 EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison);
 
+void nvdimm_clear_from_poison_list(struct nvdimm_bus *nvdimm_bus,
+               phys_addr_t start, unsigned int len)
+{
+       struct list_head *poison_list = &nvdimm_bus->poison_list;
+       u64 clr_end = start + len - 1;
+       struct nd_poison *pl, *next;
+
+       nvdimm_bus_lock(&nvdimm_bus->dev);
+       WARN_ON_ONCE(list_empty(poison_list));
+
+       /*
+        * [start, clr_end] is the poison interval being cleared.
+        * [pl->start, pl_end] is the poison_list entry we're comparing
+        * the above interval against. The poison list entry may need
+        * to be modified (update either start or length), deleted, or
+        * split into two based on the overlap characteristics
+        */
+
+       list_for_each_entry_safe(pl, next, poison_list, list) {
+               u64 pl_end = pl->start + pl->length - 1;
+
+               /* Skip intervals with no intersection */
+               if (pl_end < start)
+                       continue;
+               if (pl->start >  clr_end)
+                       continue;
+               /* Delete completely overlapped poison entries */
+               if ((pl->start >= start) && (pl_end <= clr_end)) {
+                       list_del(&pl->list);
+                       kfree(pl);
+                       continue;
+               }
+               /* Adjust start point of partially cleared entries */
+               if ((start <= pl->start) && (clr_end > pl->start)) {
+                       pl->length -= clr_end - pl->start + 1;
+                       pl->start = clr_end + 1;
+                       continue;
+               }
+               /* Adjust pl->length for partial clearing at the tail end */
+               if ((pl->start < start) && (pl_end <= clr_end)) {
+                       /* pl->start remains the same */
+                       pl->length = start - pl->start;
+                       continue;
+               }
+               /*
+                * If clearing in the middle of an entry, we split it into
+                * two by modifying the current entry to represent one half of
+                * the split, and adding a new entry for the second half.
+                */
+               if ((pl->start < start) && (pl_end > clr_end)) {
+                       u64 new_start = clr_end + 1;
+                       u64 new_len = pl_end - new_start + 1;
+
+                       /* Add new entry covering the right half */
+                       add_poison(nvdimm_bus, new_start, new_len, GFP_NOIO);
+                       /* Adjust this entry to cover the left half */
+                       pl->length = start - pl->start;
+                       continue;
+               }
+       }
+       nvdimm_bus_unlock(&nvdimm_bus->dev);
+}
+EXPORT_SYMBOL_GPL(nvdimm_clear_from_poison_list);
+
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
 {
index 71d12bb..619834e 100644 (file)
@@ -26,6 +26,14 @@ static int nvdimm_probe(struct device *dev)
        struct nvdimm_drvdata *ndd;
        int rc;
 
+       rc = nvdimm_check_config_data(dev);
+       if (rc) {
+               /* not required for non-aliased nvdimm, ex. NVDIMM-N */
+               if (rc == -ENOTTY)
+                       rc = 0;
+               return rc;
+       }
+
        ndd = kzalloc(sizeof(*ndd), GFP_KERNEL);
        if (!ndd)
                return -ENOMEM;
@@ -72,6 +80,9 @@ static int nvdimm_remove(struct device *dev)
 {
        struct nvdimm_drvdata *ndd = dev_get_drvdata(dev);
 
+       if (!ndd)
+               return 0;
+
        nvdimm_bus_lock(dev);
        dev_set_drvdata(dev, NULL);
        nvdimm_bus_unlock(dev);
index d9bba5e..d614493 100644 (file)
@@ -28,28 +28,30 @@ static DEFINE_IDA(dimm_ida);
  * Retrieve bus and dimm handle and return if this bus supports
  * get_config_data commands
  */
-static int __validate_dimm(struct nvdimm_drvdata *ndd)
+int nvdimm_check_config_data(struct device *dev)
 {
-       struct nvdimm *nvdimm;
-
-       if (!ndd)
-               return -EINVAL;
-
-       nvdimm = to_nvdimm(ndd->dev);
+       struct nvdimm *nvdimm = to_nvdimm(dev);
 
-       if (!nvdimm->cmd_mask)
-               return -ENXIO;
-       if (!test_bit(ND_CMD_GET_CONFIG_DATA, &nvdimm->cmd_mask))
-               return -ENXIO;
+       if (!nvdimm->cmd_mask ||
+           !test_bit(ND_CMD_GET_CONFIG_DATA, &nvdimm->cmd_mask)) {
+               if (nvdimm->flags & NDD_ALIASING)
+                       return -ENXIO;
+               else
+                       return -ENOTTY;
+       }
 
        return 0;
 }
 
 static int validate_dimm(struct nvdimm_drvdata *ndd)
 {
-       int rc = __validate_dimm(ndd);
+       int rc;
 
-       if (rc && ndd)
+       if (!ndd)
+               return -EINVAL;
+
+       rc = nvdimm_check_config_data(ndd->dev);
+       if (rc)
                dev_dbg(ndd->dev, "%pf: %s error: %d\n",
                                __builtin_return_address(0), __func__, rc);
        return rc;
@@ -263,6 +265,12 @@ const char *nvdimm_name(struct nvdimm *nvdimm)
 }
 EXPORT_SYMBOL_GPL(nvdimm_name);
 
+struct kobject *nvdimm_kobj(struct nvdimm *nvdimm)
+{
+       return &nvdimm->dev.kobj;
+}
+EXPORT_SYMBOL_GPL(nvdimm_kobj);
+
 unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm)
 {
        return nvdimm->cmd_mask;
@@ -378,40 +386,166 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
 }
 EXPORT_SYMBOL_GPL(nvdimm_create);
 
+int alias_dpa_busy(struct device *dev, void *data)
+{
+       resource_size_t map_end, blk_start, new, busy;
+       struct blk_alloc_info *info = data;
+       struct nd_mapping *nd_mapping;
+       struct nd_region *nd_region;
+       struct nvdimm_drvdata *ndd;
+       struct resource *res;
+       int i;
+
+       if (!is_nd_pmem(dev))
+               return 0;
+
+       nd_region = to_nd_region(dev);
+       for (i = 0; i < nd_region->ndr_mappings; i++) {
+               nd_mapping  = &nd_region->mapping[i];
+               if (nd_mapping->nvdimm == info->nd_mapping->nvdimm)
+                       break;
+       }
+
+       if (i >= nd_region->ndr_mappings)
+               return 0;
+
+       ndd = to_ndd(nd_mapping);
+       map_end = nd_mapping->start + nd_mapping->size - 1;
+       blk_start = nd_mapping->start;
+
+       /*
+        * In the allocation case ->res is set to free space that we are
+        * looking to validate against PMEM aliasing collision rules
+        * (i.e. BLK is allocated after all aliased PMEM).
+        */
+       if (info->res) {
+               if (info->res->start >= nd_mapping->start
+                               && info->res->start < map_end)
+                       /* pass */;
+               else
+                       return 0;
+       }
+
+ retry:
+       /*
+        * Find the free dpa from the end of the last pmem allocation to
+        * the end of the interleave-set mapping that is not already
+        * covered by a blk allocation.
+        */
+       busy = 0;
+       for_each_dpa_resource(ndd, res) {
+               if ((res->start >= blk_start && res->start < map_end)
+                               || (res->end >= blk_start
+                                       && res->end <= map_end)) {
+                       if (strncmp(res->name, "pmem", 4) == 0) {
+                               new = max(blk_start, min(map_end + 1,
+                                                       res->end + 1));
+                               if (new != blk_start) {
+                                       blk_start = new;
+                                       goto retry;
+                               }
+                       } else
+                               busy += min(map_end, res->end)
+                                       - max(nd_mapping->start, res->start) + 1;
+               } else if (nd_mapping->start > res->start
+                               && map_end < res->end) {
+                       /* total eclipse of the PMEM region mapping */
+                       busy += nd_mapping->size;
+                       break;
+               }
+       }
+
+       /* update the free space range with the probed blk_start */
+       if (info->res && blk_start > info->res->start) {
+               info->res->start = max(info->res->start, blk_start);
+               if (info->res->start > info->res->end)
+                       info->res->end = info->res->start - 1;
+               return 1;
+       }
+
+       info->available -= blk_start - nd_mapping->start + busy;
+
+       return 0;
+}
+
+static int blk_dpa_busy(struct device *dev, void *data)
+{
+       struct blk_alloc_info *info = data;
+       struct nd_mapping *nd_mapping;
+       struct nd_region *nd_region;
+       resource_size_t map_end;
+       int i;
+
+       if (!is_nd_pmem(dev))
+               return 0;
+
+       nd_region = to_nd_region(dev);
+       for (i = 0; i < nd_region->ndr_mappings; i++) {
+               nd_mapping  = &nd_region->mapping[i];
+               if (nd_mapping->nvdimm == info->nd_mapping->nvdimm)
+                       break;
+       }
+
+       if (i >= nd_region->ndr_mappings)
+               return 0;
+
+       map_end = nd_mapping->start + nd_mapping->size - 1;
+       if (info->res->start >= nd_mapping->start
+                       && info->res->start < map_end) {
+               if (info->res->end <= map_end) {
+                       info->busy = 0;
+                       return 1;
+               } else {
+                       info->busy -= info->res->end - map_end;
+                       return 0;
+               }
+       } else if (info->res->end >= nd_mapping->start
+                       && info->res->end <= map_end) {
+               info->busy -= nd_mapping->start - info->res->start;
+               return 0;
+       } else {
+               info->busy -= nd_mapping->size;
+               return 0;
+       }
+}
+
 /**
  * nd_blk_available_dpa - account the unused dpa of BLK region
  * @nd_mapping: container of dpa-resource-root + labels
  *
- * Unlike PMEM, BLK namespaces can occupy discontiguous DPA ranges.
+ * Unlike PMEM, BLK namespaces can occupy discontiguous DPA ranges, but
+ * we arrange for them to never start at an lower dpa than the last
+ * PMEM allocation in an aliased region.
  */
-resource_size_t nd_blk_available_dpa(struct nd_mapping *nd_mapping)
+resource_size_t nd_blk_available_dpa(struct nd_region *nd_region)
 {
+       struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
+       struct nd_mapping *nd_mapping = &nd_region->mapping[0];
        struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
-       resource_size_t map_end, busy = 0, available;
+       struct blk_alloc_info info = {
+               .nd_mapping = nd_mapping,
+               .available = nd_mapping->size,
+               .res = NULL,
+       };
        struct resource *res;
 
        if (!ndd)
                return 0;
 
-       map_end = nd_mapping->start + nd_mapping->size - 1;
-       for_each_dpa_resource(ndd, res)
-               if (res->start >= nd_mapping->start && res->start < map_end) {
-                       resource_size_t end = min(map_end, res->end);
+       device_for_each_child(&nvdimm_bus->dev, &info, alias_dpa_busy);
 
-                       busy += end - res->start + 1;
-               } else if (res->end >= nd_mapping->start
-                               && res->end <= map_end) {
-                       busy += res->end - nd_mapping->start;
-               } else if (nd_mapping->start > res->start
-                               && nd_mapping->start < res->end) {
-                       /* total eclipse of the BLK region mapping */
-                       busy += nd_mapping->size;
-               }
+       /* now account for busy blk allocations in unaliased dpa */
+       for_each_dpa_resource(ndd, res) {
+               if (strncmp(res->name, "blk", 3) != 0)
+                       continue;
 
-       available = map_end - nd_mapping->start + 1;
-       if (busy < available)
-               return available - busy;
-       return 0;
+               info.res = res;
+               info.busy = resource_size(res);
+               device_for_each_child(&nvdimm_bus->dev, &info, blk_dpa_busy);
+               info.available -= info.busy;
+       }
+
+       return info.available;
 }
 
 /**
@@ -443,21 +577,16 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
        map_start = nd_mapping->start;
        map_end = map_start + nd_mapping->size - 1;
        blk_start = max(map_start, map_end + 1 - *overlap);
-       for_each_dpa_resource(ndd, res)
+       for_each_dpa_resource(ndd, res) {
                if (res->start >= map_start && res->start < map_end) {
                        if (strncmp(res->name, "blk", 3) == 0)
-                               blk_start = min(blk_start, res->start);
-                       else if (res->start != map_start) {
+                               blk_start = min(blk_start,
+                                               max(map_start, res->start));
+                       else if (res->end > map_end) {
                                reason = "misaligned to iset";
                                goto err;
-                       } else {
-                               if (busy) {
-                                       reason = "duplicate overlapping PMEM reservations?";
-                                       goto err;
-                               }
+                       } else
                                busy += resource_size(res);
-                               continue;
-                       }
                } else if (res->end >= map_start && res->end <= map_end) {
                        if (strncmp(res->name, "blk", 3) == 0) {
                                /*
@@ -466,15 +595,14 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
                                 * be used for BLK.
                                 */
                                blk_start = map_start;
-                       } else {
-                               reason = "misaligned to iset";
-                               goto err;
-                       }
+                       } else
+                               busy += resource_size(res);
                } else if (map_start > res->start && map_start < res->end) {
                        /* total eclipse of the mapping */
                        busy += nd_mapping->size;
                        blk_start = map_start;
                }
+       }
 
        *overlap = map_end + 1 - blk_start;
        available = blk_start - map_start;
@@ -483,10 +611,6 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
        return 0;
 
  err:
-       /*
-        * Something is wrong, PMEM must align with the start of the
-        * interleave set, and there can only be one allocation per set.
-        */
        nd_dbg_dpa(nd_region, ndd, res, "%s\n", reason);
        return 0;
 }
index 96526dc..fac7cab 100644 (file)
@@ -494,11 +494,13 @@ static int __pmem_label_update(struct nd_region *nd_region,
                struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm,
                int pos)
 {
-       u64 cookie = nd_region_interleave_set_cookie(nd_region), rawsize;
+       u64 cookie = nd_region_interleave_set_cookie(nd_region);
        struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
-       struct nd_namespace_label *victim_label;
+       struct nd_label_ent *label_ent, *victim = NULL;
        struct nd_namespace_label *nd_label;
        struct nd_namespace_index *nsindex;
+       struct nd_label_id label_id;
+       struct resource *res;
        unsigned long *free;
        u32 nslot, slot;
        size_t offset;
@@ -507,6 +509,16 @@ static int __pmem_label_update(struct nd_region *nd_region,
        if (!preamble_next(ndd, &nsindex, &free, &nslot))
                return -ENXIO;
 
+       nd_label_gen_id(&label_id, nspm->uuid, 0);
+       for_each_dpa_resource(ndd, res)
+               if (strcmp(res->name, label_id.id) == 0)
+                       break;
+
+       if (!res) {
+               WARN_ON_ONCE(1);
+               return -ENXIO;
+       }
+
        /* allocate and write the label to the staging (next) index */
        slot = nd_label_alloc_slot(ndd);
        if (slot == UINT_MAX)
@@ -522,11 +534,10 @@ static int __pmem_label_update(struct nd_region *nd_region,
        nd_label->nlabel = __cpu_to_le16(nd_region->ndr_mappings);
        nd_label->position = __cpu_to_le16(pos);
        nd_label->isetcookie = __cpu_to_le64(cookie);
-       rawsize = div_u64(resource_size(&nspm->nsio.res),
-                       nd_region->ndr_mappings);
-       nd_label->rawsize = __cpu_to_le64(rawsize);
-       nd_label->dpa = __cpu_to_le64(nd_mapping->start);
+       nd_label->rawsize = __cpu_to_le64(resource_size(res));
+       nd_label->dpa = __cpu_to_le64(res->start);
        nd_label->slot = __cpu_to_le32(slot);
+       nd_dbg_dpa(nd_region, ndd, res, "%s\n", __func__);
 
        /* update label */
        offset = nd_label_offset(ndd, nd_label);
@@ -536,38 +547,43 @@ static int __pmem_label_update(struct nd_region *nd_region,
                return rc;
 
        /* Garbage collect the previous label */
-       victim_label = nd_mapping->labels[0];
-       if (victim_label) {
-               slot = to_slot(ndd, victim_label);
-               nd_label_free_slot(ndd, slot);
+       mutex_lock(&nd_mapping->lock);
+       list_for_each_entry(label_ent, &nd_mapping->labels, list) {
+               if (!label_ent->label)
+                       continue;
+               if (memcmp(nspm->uuid, label_ent->label->uuid,
+                                       NSLABEL_UUID_LEN) != 0)
+                       continue;
+               victim = label_ent;
+               list_move_tail(&victim->list, &nd_mapping->labels);
+               break;
+       }
+       if (victim) {
                dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot);
+               slot = to_slot(ndd, victim->label);
+               nd_label_free_slot(ndd, slot);
+               victim->label = NULL;
        }
 
        /* update index */
        rc = nd_label_write_index(ndd, ndd->ns_next,
                        nd_inc_seq(__le32_to_cpu(nsindex->seq)), 0);
-       if (rc < 0)
-               return rc;
-
-       nd_mapping->labels[0] = nd_label;
-
-       return 0;
-}
-
-static void del_label(struct nd_mapping *nd_mapping, int l)
-{
-       struct nd_namespace_label *next_label, *nd_label;
-       struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
-       unsigned int slot;
-       int j;
-
-       nd_label = nd_mapping->labels[l];
-       slot = to_slot(ndd, nd_label);
-       dev_vdbg(ndd->dev, "%s: clear: %d\n", __func__, slot);
+       if (rc == 0) {
+               list_for_each_entry(label_ent, &nd_mapping->labels, list)
+                       if (!label_ent->label) {
+                               label_ent->label = nd_label;
+                               nd_label = NULL;
+                               break;
+                       }
+               dev_WARN_ONCE(&nspm->nsio.common.dev, nd_label,
+                               "failed to track label: %d\n",
+                               to_slot(ndd, nd_label));
+               if (nd_label)
+                       rc = -ENXIO;
+       }
+       mutex_unlock(&nd_mapping->lock);
 
-       for (j = l; (next_label = nd_mapping->labels[j + 1]); j++)
-               nd_mapping->labels[j] = next_label;
-       nd_mapping->labels[j] = NULL;
+       return rc;
 }
 
 static bool is_old_resource(struct resource *res, struct resource **list, int n)
@@ -607,14 +623,16 @@ static int __blk_label_update(struct nd_region *nd_region,
                struct nd_mapping *nd_mapping, struct nd_namespace_blk *nsblk,
                int num_labels)
 {
-       int i, l, alloc, victims, nfree, old_num_resources, nlabel, rc = -ENXIO;
+       int i, alloc, victims, nfree, old_num_resources, nlabel, rc = -ENXIO;
        struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
        struct nd_namespace_label *nd_label;
+       struct nd_label_ent *label_ent, *e;
        struct nd_namespace_index *nsindex;
        unsigned long *free, *victim_map = NULL;
        struct resource *res, **old_res_list;
        struct nd_label_id label_id;
        u8 uuid[NSLABEL_UUID_LEN];
+       LIST_HEAD(list);
        u32 nslot, slot;
 
        if (!preamble_next(ndd, &nsindex, &free, &nslot))
@@ -736,15 +754,22 @@ static int __blk_label_update(struct nd_region *nd_region,
         * entries in nd_mapping->labels
         */
        nlabel = 0;
-       for_each_label(l, nd_label, nd_mapping->labels) {
+       mutex_lock(&nd_mapping->lock);
+       list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) {
+               nd_label = label_ent->label;
+               if (!nd_label)
+                       continue;
                nlabel++;
                memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN);
                if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0)
                        continue;
                nlabel--;
-               del_label(nd_mapping, l);
-               l--; /* retry with the new label at this index */
+               list_move(&label_ent->list, &list);
+               label_ent->label = NULL;
        }
+       list_splice_tail_init(&list, &nd_mapping->labels);
+       mutex_unlock(&nd_mapping->lock);
+
        if (nlabel + nsblk->num_resources > num_labels) {
                /*
                 * Bug, we can't end up with more resources than
@@ -755,6 +780,15 @@ static int __blk_label_update(struct nd_region *nd_region,
                goto out;
        }
 
+       mutex_lock(&nd_mapping->lock);
+       label_ent = list_first_entry_or_null(&nd_mapping->labels,
+                       typeof(*label_ent), list);
+       if (!label_ent) {
+               WARN_ON(1);
+               mutex_unlock(&nd_mapping->lock);
+               rc = -ENXIO;
+               goto out;
+       }
        for_each_clear_bit_le(slot, free, nslot) {
                nd_label = nd_label_base(ndd) + slot;
                memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN);
@@ -762,11 +796,19 @@ static int __blk_label_update(struct nd_region *nd_region,
                        continue;
                res = to_resource(ndd, nd_label);
                res->flags &= ~DPA_RESOURCE_ADJUSTED;
-               dev_vdbg(&nsblk->common.dev, "assign label[%d] slot: %d\n",
-                               l, slot);
-               nd_mapping->labels[l++] = nd_label;
+               dev_vdbg(&nsblk->common.dev, "assign label slot: %d\n", slot);
+               list_for_each_entry_from(label_ent, &nd_mapping->labels, list) {
+                       if (label_ent->label)
+                               continue;
+                       label_ent->label = nd_label;
+                       nd_label = NULL;
+                       break;
+               }
+               if (nd_label)
+                       dev_WARN(&nsblk->common.dev,
+                                       "failed to track label slot%d\n", slot);
        }
-       nd_mapping->labels[l] = NULL;
+       mutex_unlock(&nd_mapping->lock);
 
  out:
        kfree(old_res_list);
@@ -788,32 +830,28 @@ static int __blk_label_update(struct nd_region *nd_region,
 
 static int init_labels(struct nd_mapping *nd_mapping, int num_labels)
 {
-       int i, l, old_num_labels = 0;
+       int i, old_num_labels = 0;
+       struct nd_label_ent *label_ent;
        struct nd_namespace_index *nsindex;
-       struct nd_namespace_label *nd_label;
        struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
-       size_t size = (num_labels + 1) * sizeof(struct nd_namespace_label *);
 
-       for_each_label(l, nd_label, nd_mapping->labels)
+       mutex_lock(&nd_mapping->lock);
+       list_for_each_entry(label_ent, &nd_mapping->labels, list)
                old_num_labels++;
+       mutex_unlock(&nd_mapping->lock);
 
        /*
         * We need to preserve all the old labels for the mapping so
         * they can be garbage collected after writing the new labels.
         */
-       if (num_labels > old_num_labels) {
-               struct nd_namespace_label **labels;
-
-               labels = krealloc(nd_mapping->labels, size, GFP_KERNEL);
-               if (!labels)
+       for (i = old_num_labels; i < num_labels; i++) {
+               label_ent = kzalloc(sizeof(*label_ent), GFP_KERNEL);
+               if (!label_ent)
                        return -ENOMEM;
-               nd_mapping->labels = labels;
+               mutex_lock(&nd_mapping->lock);
+               list_add_tail(&label_ent->list, &nd_mapping->labels);
+               mutex_unlock(&nd_mapping->lock);
        }
-       if (!nd_mapping->labels)
-               return -ENOMEM;
-
-       for (i = old_num_labels; i <= num_labels; i++)
-               nd_mapping->labels[i] = NULL;
 
        if (ndd->ns_current == -1 || ndd->ns_next == -1)
                /* pass */;
@@ -837,42 +875,45 @@ static int init_labels(struct nd_mapping *nd_mapping, int num_labels)
 static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid)
 {
        struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
-       struct nd_namespace_label *nd_label;
+       struct nd_label_ent *label_ent, *e;
        struct nd_namespace_index *nsindex;
        u8 label_uuid[NSLABEL_UUID_LEN];
-       int l, num_freed = 0;
        unsigned long *free;
+       LIST_HEAD(list);
        u32 nslot, slot;
+       int active = 0;
 
        if (!uuid)
                return 0;
 
        /* no index || no labels == nothing to delete */
-       if (!preamble_next(ndd, &nsindex, &free, &nslot)
-                       || !nd_mapping->labels)
+       if (!preamble_next(ndd, &nsindex, &free, &nslot))
                return 0;
 
-       for_each_label(l, nd_label, nd_mapping->labels) {
+       mutex_lock(&nd_mapping->lock);
+       list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) {
+               struct nd_namespace_label *nd_label = label_ent->label;
+
+               if (!nd_label)
+                       continue;
+               active++;
                memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN);
                if (memcmp(label_uuid, uuid, NSLABEL_UUID_LEN) != 0)
                        continue;
+               active--;
                slot = to_slot(ndd, nd_label);
                nd_label_free_slot(ndd, slot);
                dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot);
-               del_label(nd_mapping, l);
-               num_freed++;
-               l--; /* retry with new label at this index */
+               list_move_tail(&label_ent->list, &list);
+               label_ent->label = NULL;
        }
+       list_splice_tail_init(&list, &nd_mapping->labels);
 
-       if (num_freed > l) {
-               /*
-                * num_freed will only ever be > l when we delete the last
-                * label
-                */
-               kfree(nd_mapping->labels);
-               nd_mapping->labels = NULL;
-               dev_dbg(ndd->dev, "%s: no more labels\n", __func__);
+       if (active == 0) {
+               nd_mapping_free_labels(nd_mapping);
+               dev_dbg(ndd->dev, "%s: no more active labels\n", __func__);
        }
+       mutex_unlock(&nd_mapping->lock);
 
        return nd_label_write_index(ndd, ndd->ns_next,
                        nd_inc_seq(__le32_to_cpu(nsindex->seq)), 0);
@@ -885,7 +926,9 @@ int nd_pmem_namespace_label_update(struct nd_region *nd_region,
 
        for (i = 0; i < nd_region->ndr_mappings; i++) {
                struct nd_mapping *nd_mapping = &nd_region->mapping[i];
-               int rc;
+               struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+               struct resource *res;
+               int rc, count = 0;
 
                if (size == 0) {
                        rc = del_labels(nd_mapping, nspm->uuid);
@@ -894,7 +937,12 @@ int nd_pmem_namespace_label_update(struct nd_region *nd_region,
                        continue;
                }
 
-               rc = init_labels(nd_mapping, 1);
+               for_each_dpa_resource(ndd, res)
+                       if (strncmp(res->name, "pmem", 3) == 0)
+                               count++;
+               WARN_ON_ONCE(!count);
+
+               rc = init_labels(nd_mapping, count);
                if (rc < 0)
                        return rc;
 
index c5e3196..3509cff 100644 (file)
  */
 #include <linux/module.h>
 #include <linux/device.h>
+#include <linux/sort.h>
 #include <linux/slab.h>
 #include <linux/pmem.h>
+#include <linux/list.h>
 #include <linux/nd.h>
 #include "nd-core.h"
 #include "nd.h"
@@ -28,7 +30,10 @@ static void namespace_io_release(struct device *dev)
 static void namespace_pmem_release(struct device *dev)
 {
        struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+       struct nd_region *nd_region = to_nd_region(dev->parent);
 
+       if (nspm->id >= 0)
+               ida_simple_remove(&nd_region->ns_ida, nspm->id);
        kfree(nspm->alt_name);
        kfree(nspm->uuid);
        kfree(nspm);
@@ -62,17 +67,17 @@ static struct device_type namespace_blk_device_type = {
        .release = namespace_blk_release,
 };
 
-static bool is_namespace_pmem(struct device *dev)
+static bool is_namespace_pmem(const struct device *dev)
 {
        return dev ? dev->type == &namespace_pmem_device_type : false;
 }
 
-static bool is_namespace_blk(struct device *dev)
+static bool is_namespace_blk(const struct device *dev)
 {
        return dev ? dev->type == &namespace_blk_device_type : false;
 }
 
-static bool is_namespace_io(struct device *dev)
+static bool is_namespace_io(const struct device *dev)
 {
        return dev ? dev->type == &namespace_io_device_type : false;
 }
@@ -168,7 +173,21 @@ const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
                suffix = "s";
 
        if (is_namespace_pmem(&ndns->dev) || is_namespace_io(&ndns->dev)) {
-               sprintf(name, "pmem%d%s", nd_region->id, suffix ? suffix : "");
+               int nsidx = 0;
+
+               if (is_namespace_pmem(&ndns->dev)) {
+                       struct nd_namespace_pmem *nspm;
+
+                       nspm = to_nd_namespace_pmem(&ndns->dev);
+                       nsidx = nspm->id;
+               }
+
+               if (nsidx)
+                       sprintf(name, "pmem%d.%d%s", nd_region->id, nsidx,
+                                       suffix ? suffix : "");
+               else
+                       sprintf(name, "pmem%d%s", nd_region->id,
+                                       suffix ? suffix : "");
        } else if (is_namespace_blk(&ndns->dev)) {
                struct nd_namespace_blk *nsblk;
 
@@ -294,7 +313,7 @@ static bool __nd_namespace_blk_validate(struct nd_namespace_blk *nsblk)
                if (strcmp(res->name, label_id.id) != 0)
                        continue;
                /*
-                * Resources with unacknoweldged adjustments indicate a
+                * Resources with unacknowledged adjustments indicate a
                 * failure to update labels
                 */
                if (res->flags & DPA_RESOURCE_ADJUSTED)
@@ -510,19 +529,68 @@ static resource_size_t init_dpa_allocation(struct nd_label_id *label_id,
        return rc ? n : 0;
 }
 
-static bool space_valid(bool is_pmem, bool is_reserve,
-               struct nd_label_id *label_id, struct resource *res)
+
+/**
+ * space_valid() - validate free dpa space against constraints
+ * @nd_region: hosting region of the free space
+ * @ndd: dimm device data for debug
+ * @label_id: namespace id to allocate space
+ * @prev: potential allocation that precedes free space
+ * @next: allocation that follows the given free space range
+ * @exist: first allocation with same id in the mapping
+ * @n: range that must satisfied for pmem allocations
+ * @valid: free space range to validate
+ *
+ * BLK-space is valid as long as it does not precede a PMEM
+ * allocation in a given region. PMEM-space must be contiguous
+ * and adjacent to an existing existing allocation (if one
+ * exists).  If reserving PMEM any space is valid.
+ */
+static void space_valid(struct nd_region *nd_region, struct nvdimm_drvdata *ndd,
+               struct nd_label_id *label_id, struct resource *prev,
+               struct resource *next, struct resource *exist,
+               resource_size_t n, struct resource *valid)
 {
-       /*
-        * For BLK-space any space is valid, for PMEM-space, it must be
-        * contiguous with an existing allocation unless we are
-        * reserving pmem.
-        */
-       if (is_reserve || !is_pmem)
-               return true;
-       if (!res || strcmp(res->name, label_id->id) == 0)
-               return true;
-       return false;
+       bool is_reserve = strcmp(label_id->id, "pmem-reserve") == 0;
+       bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0;
+
+       if (valid->start >= valid->end)
+               goto invalid;
+
+       if (is_reserve)
+               return;
+
+       if (!is_pmem) {
+               struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+               struct nvdimm_bus *nvdimm_bus;
+               struct blk_alloc_info info = {
+                       .nd_mapping = nd_mapping,
+                       .available = nd_mapping->size,
+                       .res = valid,
+               };
+
+               WARN_ON(!is_nd_blk(&nd_region->dev));
+               nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
+               device_for_each_child(&nvdimm_bus->dev, &info, alias_dpa_busy);
+               return;
+       }
+
+       /* allocation needs to be contiguous, so this is all or nothing */
+       if (resource_size(valid) < n)
+               goto invalid;
+
+       /* we've got all the space we need and no existing allocation */
+       if (!exist)
+               return;
+
+       /* allocation needs to be contiguous with the existing namespace */
+       if (valid->start == exist->end + 1
+                       || valid->end == exist->start - 1)
+               return;
+
+ invalid:
+       /* truncate @valid size to 0 */
+       valid->end = valid->start - 1;
 }
 
 enum alloc_loc {
@@ -534,18 +602,24 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
                resource_size_t n)
 {
        resource_size_t mapping_end = nd_mapping->start + nd_mapping->size - 1;
-       bool is_reserve = strcmp(label_id->id, "pmem-reserve") == 0;
        bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0;
        struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+       struct resource *res, *exist = NULL, valid;
        const resource_size_t to_allocate = n;
-       struct resource *res;
        int first;
 
+       for_each_dpa_resource(ndd, res)
+               if (strcmp(label_id->id, res->name) == 0)
+                       exist = res;
+
+       valid.start = nd_mapping->start;
+       valid.end = mapping_end;
+       valid.name = "free space";
  retry:
        first = 0;
        for_each_dpa_resource(ndd, res) {
-               resource_size_t allocate, available = 0, free_start, free_end;
                struct resource *next = res->sibling, *new_res = NULL;
+               resource_size_t allocate, available = 0;
                enum alloc_loc loc = ALLOC_ERR;
                const char *action;
                int rc = 0;
@@ -558,32 +632,35 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
 
                /* space at the beginning of the mapping */
                if (!first++ && res->start > nd_mapping->start) {
-                       free_start = nd_mapping->start;
-                       available = res->start - free_start;
-                       if (space_valid(is_pmem, is_reserve, label_id, NULL))
+                       valid.start = nd_mapping->start;
+                       valid.end = res->start - 1;
+                       space_valid(nd_region, ndd, label_id, NULL, next, exist,
+                                       to_allocate, &valid);
+                       available = resource_size(&valid);
+                       if (available)
                                loc = ALLOC_BEFORE;
                }
 
                /* space between allocations */
                if (!loc && next) {
-                       free_start = res->start + resource_size(res);
-                       free_end = min(mapping_end, next->start - 1);
-                       if (space_valid(is_pmem, is_reserve, label_id, res)
-                                       && free_start < free_end) {
-                               available = free_end + 1 - free_start;
+                       valid.start = res->start + resource_size(res);
+                       valid.end = min(mapping_end, next->start - 1);
+                       space_valid(nd_region, ndd, label_id, res, next, exist,
+                                       to_allocate, &valid);
+                       available = resource_size(&valid);
+                       if (available)
                                loc = ALLOC_MID;
-                       }
                }
 
                /* space at the end of the mapping */
                if (!loc && !next) {
-                       free_start = res->start + resource_size(res);
-                       free_end = mapping_end;
-                       if (space_valid(is_pmem, is_reserve, label_id, res)
-                                       && free_start < free_end) {
-                               available = free_end + 1 - free_start;
+                       valid.start = res->start + resource_size(res);
+                       valid.end = mapping_end;
+                       space_valid(nd_region, ndd, label_id, res, next, exist,
+                                       to_allocate, &valid);
+                       available = resource_size(&valid);
+                       if (available)
                                loc = ALLOC_AFTER;
-                       }
                }
 
                if (!loc || !available)
@@ -593,8 +670,6 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
                case ALLOC_BEFORE:
                        if (strcmp(res->name, label_id->id) == 0) {
                                /* adjust current resource up */
-                               if (is_pmem && !is_reserve)
-                                       return n;
                                rc = adjust_resource(res, res->start - allocate,
                                                resource_size(res) + allocate);
                                action = "cur grow up";
@@ -604,8 +679,6 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
                case ALLOC_MID:
                        if (strcmp(next->name, label_id->id) == 0) {
                                /* adjust next resource up */
-                               if (is_pmem && !is_reserve)
-                                       return n;
                                rc = adjust_resource(next, next->start
                                                - allocate, resource_size(next)
                                                + allocate);
@@ -629,12 +702,10 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
                if (strcmp(action, "allocate") == 0) {
                        /* BLK allocate bottom up */
                        if (!is_pmem)
-                               free_start += available - allocate;
-                       else if (!is_reserve && free_start != nd_mapping->start)
-                               return n;
+                               valid.start += available - allocate;
 
                        new_res = nvdimm_allocate_dpa(ndd, label_id,
-                                       free_start, allocate);
+                                       valid.start, allocate);
                        if (!new_res)
                                rc = -EBUSY;
                } else if (strcmp(action, "grow down") == 0) {
@@ -832,13 +903,45 @@ static int grow_dpa_allocation(struct nd_region *nd_region,
        return 0;
 }
 
-static void nd_namespace_pmem_set_size(struct nd_region *nd_region,
+static void nd_namespace_pmem_set_resource(struct nd_region *nd_region,
                struct nd_namespace_pmem *nspm, resource_size_t size)
 {
        struct resource *res = &nspm->nsio.res;
+       resource_size_t offset = 0;
 
-       res->start = nd_region->ndr_start;
-       res->end = nd_region->ndr_start + size - 1;
+       if (size && !nspm->uuid) {
+               WARN_ON_ONCE(1);
+               size = 0;
+       }
+
+       if (size && nspm->uuid) {
+               struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+               struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+               struct nd_label_id label_id;
+               struct resource *res;
+
+               if (!ndd) {
+                       size = 0;
+                       goto out;
+               }
+
+               nd_label_gen_id(&label_id, nspm->uuid, 0);
+
+               /* calculate a spa offset from the dpa allocation offset */
+               for_each_dpa_resource(ndd, res)
+                       if (strcmp(res->name, label_id.id) == 0) {
+                               offset = (res->start - nd_mapping->start)
+                                       * nd_region->ndr_mappings;
+                               goto out;
+                       }
+
+               WARN_ON_ONCE(1);
+               size = 0;
+       }
+
+ out:
+       res->start = nd_region->ndr_start + offset;
+       res->end = res->start + size - 1;
 }
 
 static bool uuid_not_set(const u8 *uuid, struct device *dev, const char *where)
@@ -929,7 +1032,7 @@ static ssize_t __size_store(struct device *dev, unsigned long long val)
        if (is_namespace_pmem(dev)) {
                struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
 
-               nd_namespace_pmem_set_size(nd_region, nspm,
+               nd_namespace_pmem_set_resource(nd_region, nspm,
                                val * nd_region->ndr_mappings);
        } else if (is_namespace_blk(dev)) {
                struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
@@ -1031,22 +1134,27 @@ static ssize_t size_show(struct device *dev,
 }
 static DEVICE_ATTR(size, S_IRUGO, size_show, size_store);
 
-static ssize_t uuid_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
+static u8 *namespace_to_uuid(struct device *dev)
 {
-       u8 *uuid;
-
        if (is_namespace_pmem(dev)) {
                struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
 
-               uuid = nspm->uuid;
+               return nspm->uuid;
        } else if (is_namespace_blk(dev)) {
                struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
 
-               uuid = nsblk->uuid;
+               return nsblk->uuid;
        } else
-               return -ENXIO;
+               return ERR_PTR(-ENXIO);
+}
 
+static ssize_t uuid_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       u8 *uuid = namespace_to_uuid(dev);
+
+       if (IS_ERR(uuid))
+               return PTR_ERR(uuid);
        if (uuid)
                return sprintf(buf, "%pUb\n", uuid);
        return sprintf(buf, "\n");
@@ -1089,7 +1197,7 @@ static int namespace_update_uuid(struct nd_region *nd_region,
                 *
                 * FIXME: can we delete uuid with zero dpa allocated?
                 */
-               if (nd_mapping->labels)
+               if (list_empty(&nd_mapping->labels))
                        return -EBUSY;
        }
 
@@ -1491,14 +1599,19 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
 
        for (i = 0; i < nd_region->ndr_mappings; i++) {
                struct nd_mapping *nd_mapping = &nd_region->mapping[i];
-               struct nd_namespace_label *nd_label;
+               struct nd_label_ent *label_ent;
                bool found_uuid = false;
-               int l;
 
-               for_each_label(l, nd_label, nd_mapping->labels) {
-                       u64 isetcookie = __le64_to_cpu(nd_label->isetcookie);
-                       u16 position = __le16_to_cpu(nd_label->position);
-                       u16 nlabel = __le16_to_cpu(nd_label->nlabel);
+               list_for_each_entry(label_ent, &nd_mapping->labels, list) {
+                       struct nd_namespace_label *nd_label = label_ent->label;
+                       u16 position, nlabel;
+                       u64 isetcookie;
+
+                       if (!nd_label)
+                               continue;
+                       isetcookie = __le64_to_cpu(nd_label->isetcookie);
+                       position = __le16_to_cpu(nd_label->position);
+                       nlabel = __le16_to_cpu(nd_label->nlabel);
 
                        if (isetcookie != cookie)
                                continue;
@@ -1528,7 +1641,6 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
 
 static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
 {
-       struct nd_namespace_label *select = NULL;
        int i;
 
        if (!pmem_id)
@@ -1536,90 +1648,106 @@ static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
 
        for (i = 0; i < nd_region->ndr_mappings; i++) {
                struct nd_mapping *nd_mapping = &nd_region->mapping[i];
-               struct nd_namespace_label *nd_label;
+               struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+               struct nd_namespace_label *nd_label = NULL;
                u64 hw_start, hw_end, pmem_start, pmem_end;
-               int l;
+               struct nd_label_ent *label_ent;
 
-               for_each_label(l, nd_label, nd_mapping->labels)
+               WARN_ON(!mutex_is_locked(&nd_mapping->lock));
+               list_for_each_entry(label_ent, &nd_mapping->labels, list) {
+                       nd_label = label_ent->label;
+                       if (!nd_label)
+                               continue;
                        if (memcmp(nd_label->uuid, pmem_id, NSLABEL_UUID_LEN) == 0)
                                break;
+                       nd_label = NULL;
+               }
 
                if (!nd_label) {
                        WARN_ON(1);
                        return -EINVAL;
                }
 
-               select = nd_label;
                /*
                 * Check that this label is compliant with the dpa
                 * range published in NFIT
                 */
                hw_start = nd_mapping->start;
                hw_end = hw_start + nd_mapping->size;
-               pmem_start = __le64_to_cpu(select->dpa);
-               pmem_end = pmem_start + __le64_to_cpu(select->rawsize);
-               if (pmem_start == hw_start && pmem_end <= hw_end)
+               pmem_start = __le64_to_cpu(nd_label->dpa);
+               pmem_end = pmem_start + __le64_to_cpu(nd_label->rawsize);
+               if (pmem_start >= hw_start && pmem_start < hw_end
+                               && pmem_end <= hw_end && pmem_end > hw_start)
                        /* pass */;
-               else
+               else {
+                       dev_dbg(&nd_region->dev, "%s invalid label for %pUb\n",
+                                       dev_name(ndd->dev), nd_label->uuid);
                        return -EINVAL;
+               }
 
-               nd_mapping->labels[0] = select;
-               nd_mapping->labels[1] = NULL;
+               /* move recently validated label to the front of the list */
+               list_move(&label_ent->list, &nd_mapping->labels);
        }
        return 0;
 }
 
 /**
- * find_pmem_label_set - validate interleave set labelling, retrieve label0
+ * create_namespace_pmem - validate interleave set labelling, retrieve label0
  * @nd_region: region with mappings to validate
+ * @nspm: target namespace to create
+ * @nd_label: target pmem namespace label to evaluate
  */
-static int find_pmem_label_set(struct nd_region *nd_region,
-               struct nd_namespace_pmem *nspm)
+struct device *create_namespace_pmem(struct nd_region *nd_region,
+               struct nd_namespace_label *nd_label)
 {
        u64 cookie = nd_region_interleave_set_cookie(nd_region);
-       struct nd_namespace_label *nd_label;
-       u8 select_id[NSLABEL_UUID_LEN];
+       struct nd_label_ent *label_ent;
+       struct nd_namespace_pmem *nspm;
+       struct nd_mapping *nd_mapping;
        resource_size_t size = 0;
-       u8 *pmem_id = NULL;
-       int rc = -ENODEV, l;
+       struct resource *res;
+       struct device *dev;
+       int rc = 0;
        u16 i;
 
-       if (cookie == 0)
-               return -ENXIO;
+       if (cookie == 0) {
+               dev_dbg(&nd_region->dev, "invalid interleave-set-cookie\n");
+               return ERR_PTR(-ENXIO);
+       }
 
-       /*
-        * Find a complete set of labels by uuid.  By definition we can start
-        * with any mapping as the reference label
-        */
-       for_each_label(l, nd_label, nd_region->mapping[0].labels) {
-               u64 isetcookie = __le64_to_cpu(nd_label->isetcookie);
+       if (__le64_to_cpu(nd_label->isetcookie) != cookie) {
+               dev_dbg(&nd_region->dev, "invalid cookie in label: %pUb\n",
+                               nd_label->uuid);
+               return ERR_PTR(-EAGAIN);
+       }
 
-               if (isetcookie != cookie)
-                       continue;
+       nspm = kzalloc(sizeof(*nspm), GFP_KERNEL);
+       if (!nspm)
+               return ERR_PTR(-ENOMEM);
 
-               for (i = 0; nd_region->ndr_mappings; i++)
-                       if (!has_uuid_at_pos(nd_region, nd_label->uuid,
-                                               cookie, i))
-                               break;
-               if (i < nd_region->ndr_mappings) {
-                       /*
-                        * Give up if we don't find an instance of a
-                        * uuid at each position (from 0 to
-                        * nd_region->ndr_mappings - 1), or if we find a
-                        * dimm with two instances of the same uuid.
-                        */
-                       rc = -EINVAL;
-                       goto err;
-               } else if (pmem_id) {
-                       /*
-                        * If there is more than one valid uuid set, we
-                        * need userspace to clean this up.
-                        */
-                       rc = -EBUSY;
-                       goto err;
-               }
-               memcpy(select_id, nd_label->uuid, NSLABEL_UUID_LEN);
-               pmem_id = select_id;
+       nspm->id = -1;
+       dev = &nspm->nsio.common.dev;
+       dev->type = &namespace_pmem_device_type;
+       dev->parent = &nd_region->dev;
+       res = &nspm->nsio.res;
+       res->name = dev_name(&nd_region->dev);
+       res->flags = IORESOURCE_MEM;
+
+       for (i = 0; i < nd_region->ndr_mappings; i++)
+               if (!has_uuid_at_pos(nd_region, nd_label->uuid, cookie, i))
+                       break;
+       if (i < nd_region->ndr_mappings) {
+               struct nvdimm_drvdata *ndd = to_ndd(&nd_region->mapping[i]);
+
+               /*
+                * Give up if we don't find an instance of a uuid at each
+                * position (from 0 to nd_region->ndr_mappings - 1), or if we
+                * find a dimm with two instances of the same uuid.
+                */
+               dev_err(&nd_region->dev, "%s missing label for %pUb\n",
+                               dev_name(ndd->dev), nd_label->uuid);
+               rc = -EINVAL;
+               goto err;
        }
 
        /*
@@ -1630,14 +1758,23 @@ static int find_pmem_label_set(struct nd_region *nd_region,
         * the dimm being enabled (i.e. nd_label_reserve_dpa()
         * succeeded).
         */
-       rc = select_pmem_id(nd_region, pmem_id);
+       rc = select_pmem_id(nd_region, nd_label->uuid);
        if (rc)
                goto err;
 
        /* Calculate total size and populate namespace properties from label0 */
        for (i = 0; i < nd_region->ndr_mappings; i++) {
-               struct nd_mapping *nd_mapping = &nd_region->mapping[i];
-               struct nd_namespace_label *label0 = nd_mapping->labels[0];
+               struct nd_namespace_label *label0;
+
+               nd_mapping = &nd_region->mapping[i];
+               label_ent = list_first_entry_or_null(&nd_mapping->labels,
+                               typeof(*label_ent), list);
+               label0 = label_ent ? label_ent->label : 0;
+
+               if (!label0) {
+                       WARN_ON(1);
+                       continue;
+               }
 
                size += __le64_to_cpu(label0->rawsize);
                if (__le16_to_cpu(label0->position) != 0)
@@ -1654,10 +1791,11 @@ static int find_pmem_label_set(struct nd_region *nd_region,
                goto err;
        }
 
-       nd_namespace_pmem_set_size(nd_region, nspm, size);
+       nd_namespace_pmem_set_resource(nd_region, nspm, size);
 
-       return 0;
+       return dev;
  err:
+       namespace_pmem_release(dev);
        switch (rc) {
        case -EINVAL:
                dev_dbg(&nd_region->dev, "%s: invalid label(s)\n", __func__);
@@ -1670,55 +1808,7 @@ static int find_pmem_label_set(struct nd_region *nd_region,
                                __func__, rc);
                break;
        }
-       return rc;
-}
-
-static struct device **create_namespace_pmem(struct nd_region *nd_region)
-{
-       struct nd_namespace_pmem *nspm;
-       struct device *dev, **devs;
-       struct resource *res;
-       int rc;
-
-       nspm = kzalloc(sizeof(*nspm), GFP_KERNEL);
-       if (!nspm)
-               return NULL;
-
-       dev = &nspm->nsio.common.dev;
-       dev->type = &namespace_pmem_device_type;
-       dev->parent = &nd_region->dev;
-       res = &nspm->nsio.res;
-       res->name = dev_name(&nd_region->dev);
-       res->flags = IORESOURCE_MEM;
-       rc = find_pmem_label_set(nd_region, nspm);
-       if (rc == -ENODEV) {
-               int i;
-
-               /* Pass, try to permit namespace creation... */
-               for (i = 0; i < nd_region->ndr_mappings; i++) {
-                       struct nd_mapping *nd_mapping = &nd_region->mapping[i];
-
-                       kfree(nd_mapping->labels);
-                       nd_mapping->labels = NULL;
-               }
-
-               /* Publish a zero-sized namespace for userspace to configure. */
-               nd_namespace_pmem_set_size(nd_region, nspm, 0);
-
-               rc = 0;
-       } else if (rc)
-               goto err;
-
-       devs = kcalloc(2, sizeof(struct device *), GFP_KERNEL);
-       if (!devs)
-               goto err;
-
-       devs[0] = dev;
-       return devs;
-
- err:
-       namespace_pmem_release(&nspm->nsio.common.dev);
-       return NULL;
+       return ERR_PTR(rc);
 }
 
 struct resource *nsblk_add_resource(struct nd_region *nd_region,
@@ -1770,16 +1860,58 @@ static struct device *nd_namespace_blk_create(struct nd_region *nd_region)
        return &nsblk->common.dev;
 }
 
-void nd_region_create_blk_seed(struct nd_region *nd_region)
+static struct device *nd_namespace_pmem_create(struct nd_region *nd_region)
+{
+       struct nd_namespace_pmem *nspm;
+       struct resource *res;
+       struct device *dev;
+
+       if (!is_nd_pmem(&nd_region->dev))
+               return NULL;
+
+       nspm = kzalloc(sizeof(*nspm), GFP_KERNEL);
+       if (!nspm)
+               return NULL;
+
+       dev = &nspm->nsio.common.dev;
+       dev->type = &namespace_pmem_device_type;
+       dev->parent = &nd_region->dev;
+       res = &nspm->nsio.res;
+       res->name = dev_name(&nd_region->dev);
+       res->flags = IORESOURCE_MEM;
+
+       nspm->id = ida_simple_get(&nd_region->ns_ida, 0, 0, GFP_KERNEL);
+       if (nspm->id < 0) {
+               kfree(nspm);
+               return NULL;
+       }
+       dev_set_name(dev, "namespace%d.%d", nd_region->id, nspm->id);
+       dev->parent = &nd_region->dev;
+       dev->groups = nd_namespace_attribute_groups;
+       nd_namespace_pmem_set_resource(nd_region, nspm, 0);
+
+       return dev;
+}
+
+void nd_region_create_ns_seed(struct nd_region *nd_region)
 {
        WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
-       nd_region->ns_seed = nd_namespace_blk_create(nd_region);
+
+       if (nd_region_to_nstype(nd_region) == ND_DEVICE_NAMESPACE_IO)
+               return;
+
+       if (is_nd_blk(&nd_region->dev))
+               nd_region->ns_seed = nd_namespace_blk_create(nd_region);
+       else
+               nd_region->ns_seed = nd_namespace_pmem_create(nd_region);
+
        /*
         * Seed creation failures are not fatal, provisioning is simply
         * disabled until memory becomes available
         */
        if (!nd_region->ns_seed)
-               dev_err(&nd_region->dev, "failed to create blk namespace\n");
+               dev_err(&nd_region->dev, "failed to create %s namespace\n",
+                               is_nd_blk(&nd_region->dev) ? "blk" : "pmem");
        else
                nd_device_register(nd_region->ns_seed);
 }
@@ -1820,43 +1952,137 @@ void nd_region_create_btt_seed(struct nd_region *nd_region)
                dev_err(&nd_region->dev, "failed to create btt namespace\n");
 }
 
-static struct device **create_namespace_blk(struct nd_region *nd_region)
+static int add_namespace_resource(struct nd_region *nd_region,
+               struct nd_namespace_label *nd_label, struct device **devs,
+               int count)
 {
        struct nd_mapping *nd_mapping = &nd_region->mapping[0];
-       struct nd_namespace_label *nd_label;
-       struct device *dev, **devs = NULL;
+       struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+       int i;
+
+       for (i = 0; i < count; i++) {
+               u8 *uuid = namespace_to_uuid(devs[i]);
+               struct resource *res;
+
+               if (IS_ERR_OR_NULL(uuid)) {
+                       WARN_ON(1);
+                       continue;
+               }
+
+               if (memcmp(uuid, nd_label->uuid, NSLABEL_UUID_LEN) != 0)
+                       continue;
+               if (is_namespace_blk(devs[i])) {
+                       res = nsblk_add_resource(nd_region, ndd,
+                                       to_nd_namespace_blk(devs[i]),
+                                       __le64_to_cpu(nd_label->dpa));
+                       if (!res)
+                               return -ENXIO;
+                       nd_dbg_dpa(nd_region, ndd, res, "%d assign\n", count);
+               } else {
+                       dev_err(&nd_region->dev,
+                                       "error: conflicting extents for uuid: %pUb\n",
+                                       nd_label->uuid);
+                       return -ENXIO;
+               }
+               break;
+       }
+
+       return i;
+}
+
+struct device *create_namespace_blk(struct nd_region *nd_region,
+               struct nd_namespace_label *nd_label, int count)
+{
+
+       struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+       struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
        struct nd_namespace_blk *nsblk;
-       struct nvdimm_drvdata *ndd;
-       int i, l, count = 0;
+       char *name[NSLABEL_NAME_LEN];
+       struct device *dev = NULL;
        struct resource *res;
 
-       if (nd_region->ndr_mappings == 0)
-               return NULL;
+       nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
+       if (!nsblk)
+               return ERR_PTR(-ENOMEM);
+       dev = &nsblk->common.dev;
+       dev->type = &namespace_blk_device_type;
+       dev->parent = &nd_region->dev;
+       nsblk->id = -1;
+       nsblk->lbasize = __le64_to_cpu(nd_label->lbasize);
+       nsblk->uuid = kmemdup(nd_label->uuid, NSLABEL_UUID_LEN,
+                       GFP_KERNEL);
+       if (!nsblk->uuid)
+               goto blk_err;
+       memcpy(name, nd_label->name, NSLABEL_NAME_LEN);
+       if (name[0])
+               nsblk->alt_name = kmemdup(name, NSLABEL_NAME_LEN,
+                               GFP_KERNEL);
+       res = nsblk_add_resource(nd_region, ndd, nsblk,
+                       __le64_to_cpu(nd_label->dpa));
+       if (!res)
+               goto blk_err;
+       nd_dbg_dpa(nd_region, ndd, res, "%d: assign\n", count);
+       return dev;
+ blk_err:
+       namespace_blk_release(dev);
+       return ERR_PTR(-ENXIO);
+}
+
+static int cmp_dpa(const void *a, const void *b)
+{
+       const struct device *dev_a = *(const struct device **) a;
+       const struct device *dev_b = *(const struct device **) b;
+       struct nd_namespace_blk *nsblk_a, *nsblk_b;
+       struct nd_namespace_pmem *nspm_a, *nspm_b;
+
+       if (is_namespace_io(dev_a))
+               return 0;
+
+       if (is_namespace_blk(dev_a)) {
+               nsblk_a = to_nd_namespace_blk(dev_a);
+               nsblk_b = to_nd_namespace_blk(dev_b);
+
+               return memcmp(&nsblk_a->res[0]->start, &nsblk_b->res[0]->start,
+                               sizeof(resource_size_t));
+       }
+
+       nspm_a = to_nd_namespace_pmem(dev_a);
+       nspm_b = to_nd_namespace_pmem(dev_b);
+
+       return memcmp(&nspm_a->nsio.res.start, &nspm_b->nsio.res.start,
+                       sizeof(resource_size_t));
+}
 
-       ndd = to_ndd(nd_mapping);
-       for_each_label(l, nd_label, nd_mapping->labels) {
-               u32 flags = __le32_to_cpu(nd_label->flags);
-               char *name[NSLABEL_NAME_LEN];
+static struct device **scan_labels(struct nd_region *nd_region)
+{
+       int i, count = 0;
+       struct device *dev, **devs = NULL;
+       struct nd_label_ent *label_ent, *e;
+       struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+       resource_size_t map_end = nd_mapping->start + nd_mapping->size - 1;
+
+       /* "safe" because create_namespace_pmem() might list_move() label_ent */
+       list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) {
+               struct nd_namespace_label *nd_label = label_ent->label;
                struct device **__devs;
+               u32 flags;
 
-               if (flags & NSLABEL_FLAG_LOCAL)
-                       /* pass */;
+               if (!nd_label)
+                       continue;
+               flags = __le32_to_cpu(nd_label->flags);
+               if (is_nd_blk(&nd_region->dev)
+                               == !!(flags & NSLABEL_FLAG_LOCAL))
+                       /* pass, region matches label type */;
                else
                        continue;
 
-               for (i = 0; i < count; i++) {
-                       nsblk = to_nd_namespace_blk(devs[i]);
-                       if (memcmp(nsblk->uuid, nd_label->uuid,
-                                               NSLABEL_UUID_LEN) == 0) {
-                               res = nsblk_add_resource(nd_region, ndd, nsblk,
-                                               __le64_to_cpu(nd_label->dpa));
-                               if (!res)
-                                       goto err;
-                               nd_dbg_dpa(nd_region, ndd, res, "%s assign\n",
-                                       dev_name(&nsblk->common.dev));
-                               break;
-                       }
-               }
+               /* skip labels that describe extents outside of the region */
+               if (nd_label->dpa < nd_mapping->start || nd_label->dpa > map_end)
+                       continue;
+
+               i = add_namespace_resource(nd_region, nd_label, devs, count);
+               if (i < 0)
+                       goto err;
                if (i < count)
                        continue;
                __devs = kcalloc(count + 2, sizeof(dev), GFP_KERNEL);
@@ -1866,67 +2092,126 @@ static struct device **create_namespace_blk(struct nd_region *nd_region)
                kfree(devs);
                devs = __devs;
 
-               nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
-               if (!nsblk)
-                       goto err;
-               dev = &nsblk->common.dev;
-               dev->type = &namespace_blk_device_type;
-               dev->parent = &nd_region->dev;
-               dev_set_name(dev, "namespace%d.%d", nd_region->id, count);
-               devs[count++] = dev;
-               nsblk->id = -1;
-               nsblk->lbasize = __le64_to_cpu(nd_label->lbasize);
-               nsblk->uuid = kmemdup(nd_label->uuid, NSLABEL_UUID_LEN,
-                               GFP_KERNEL);
-               if (!nsblk->uuid)
-                       goto err;
-               memcpy(name, nd_label->name, NSLABEL_NAME_LEN);
-               if (name[0])
-                       nsblk->alt_name = kmemdup(name, NSLABEL_NAME_LEN,
-                                       GFP_KERNEL);
-               res = nsblk_add_resource(nd_region, ndd, nsblk,
-                               __le64_to_cpu(nd_label->dpa));
-               if (!res)
-                       goto err;
-               nd_dbg_dpa(nd_region, ndd, res, "%s assign\n",
-                               dev_name(&nsblk->common.dev));
+               if (is_nd_blk(&nd_region->dev)) {
+                       dev = create_namespace_blk(nd_region, nd_label, count);
+                       if (IS_ERR(dev))
+                               goto err;
+                       devs[count++] = dev;
+               } else {
+                       dev = create_namespace_pmem(nd_region, nd_label);
+                       if (IS_ERR(dev)) {
+                               switch (PTR_ERR(dev)) {
+                               case -EAGAIN:
+                                       /* skip invalid labels */
+                                       continue;
+                               case -ENODEV:
+                                       /* fallthrough to seed creation */
+                                       break;
+                               default:
+                                       goto err;
+                               }
+                       } else
+                               devs[count++] = dev;
+               }
        }
 
-       dev_dbg(&nd_region->dev, "%s: discovered %d blk namespace%s\n",
-                       __func__, count, count == 1 ? "" : "s");
+       dev_dbg(&nd_region->dev, "%s: discovered %d %s namespace%s\n",
+                       __func__, count, is_nd_blk(&nd_region->dev)
+                       ? "blk" : "pmem", count == 1 ? "" : "s");
 
        if (count == 0) {
                /* Publish a zero-sized namespace for userspace to configure. */
-               for (i = 0; i < nd_region->ndr_mappings; i++) {
-                       struct nd_mapping *nd_mapping = &nd_region->mapping[i];
-
-                       kfree(nd_mapping->labels);
-                       nd_mapping->labels = NULL;
-               }
+               nd_mapping_free_labels(nd_mapping);
 
                devs = kcalloc(2, sizeof(dev), GFP_KERNEL);
                if (!devs)
                        goto err;
-               nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
-               if (!nsblk)
-                       goto err;
-               dev = &nsblk->common.dev;
-               dev->type = &namespace_blk_device_type;
+               if (is_nd_blk(&nd_region->dev)) {
+                       struct nd_namespace_blk *nsblk;
+
+                       nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
+                       if (!nsblk)
+                               goto err;
+                       dev = &nsblk->common.dev;
+                       dev->type = &namespace_blk_device_type;
+               } else {
+                       struct nd_namespace_pmem *nspm;
+
+                       nspm = kzalloc(sizeof(*nspm), GFP_KERNEL);
+                       if (!nspm)
+                               goto err;
+                       dev = &nspm->nsio.common.dev;
+                       dev->type = &namespace_pmem_device_type;
+                       nd_namespace_pmem_set_resource(nd_region, nspm, 0);
+               }
                dev->parent = &nd_region->dev;
                devs[count++] = dev;
+       } else if (is_nd_pmem(&nd_region->dev)) {
+               /* clean unselected labels */
+               for (i = 0; i < nd_region->ndr_mappings; i++) {
+                       struct list_head *l, *e;
+                       LIST_HEAD(list);
+                       int j;
+
+                       nd_mapping = &nd_region->mapping[i];
+                       if (list_empty(&nd_mapping->labels)) {
+                               WARN_ON(1);
+                               continue;
+                       }
+
+                       j = count;
+                       list_for_each_safe(l, e, &nd_mapping->labels) {
+                               if (!j--)
+                                       break;
+                               list_move_tail(l, &list);
+                       }
+                       nd_mapping_free_labels(nd_mapping);
+                       list_splice_init(&list, &nd_mapping->labels);
+               }
        }
 
+       if (count > 1)
+               sort(devs, count, sizeof(struct device *), cmp_dpa, NULL);
+
        return devs;
 
-err:
-       for (i = 0; i < count; i++) {
-               nsblk = to_nd_namespace_blk(devs[i]);
-               namespace_blk_release(&nsblk->common.dev);
-       }
+ err:
+       for (i = 0; devs[i]; i++)
+               if (is_nd_blk(&nd_region->dev))
+                       namespace_blk_release(devs[i]);
+               else
+                       namespace_pmem_release(devs[i]);
        kfree(devs);
        return NULL;
 }
 
+static struct device **create_namespaces(struct nd_region *nd_region)
+{
+       struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+       struct device **devs;
+       int i;
+
+       if (nd_region->ndr_mappings == 0)
+               return NULL;
+
+       /* lock down all mappings while we scan labels */
+       for (i = 0; i < nd_region->ndr_mappings; i++) {
+               nd_mapping = &nd_region->mapping[i];
+               mutex_lock_nested(&nd_mapping->lock, i);
+       }
+
+       devs = scan_labels(nd_region);
+
+       for (i = 0; i < nd_region->ndr_mappings; i++) {
+               int reverse = nd_region->ndr_mappings - 1 - i;
+
+               nd_mapping = &nd_region->mapping[reverse];
+               mutex_unlock(&nd_mapping->lock);
+       }
+
+       return devs;
+}
+
 static int init_active_labels(struct nd_region *nd_region)
 {
        int i;
@@ -1935,6 +2220,7 @@ static int init_active_labels(struct nd_region *nd_region)
                struct nd_mapping *nd_mapping = &nd_region->mapping[i];
                struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
                struct nvdimm *nvdimm = nd_mapping->nvdimm;
+               struct nd_label_ent *label_ent;
                int count, j;
 
                /*
@@ -1956,16 +2242,27 @@ static int init_active_labels(struct nd_region *nd_region)
                dev_dbg(ndd->dev, "%s: %d\n", __func__, count);
                if (!count)
                        continue;
-               nd_mapping->labels = kcalloc(count + 1, sizeof(void *),
-                               GFP_KERNEL);
-               if (!nd_mapping->labels)
-                       return -ENOMEM;
                for (j = 0; j < count; j++) {
                        struct nd_namespace_label *label;
 
+                       label_ent = kzalloc(sizeof(*label_ent), GFP_KERNEL);
+                       if (!label_ent)
+                               break;
                        label = nd_label_active(ndd, j);
-                       nd_mapping->labels[j] = label;
+                       label_ent->label = label;
+
+                       mutex_lock(&nd_mapping->lock);
+                       list_add_tail(&label_ent->list, &nd_mapping->labels);
+                       mutex_unlock(&nd_mapping->lock);
                }
+
+               if (j >= count)
+                       continue;
+
+               mutex_lock(&nd_mapping->lock);
+               nd_mapping_free_labels(nd_mapping);
+               mutex_unlock(&nd_mapping->lock);
+               return -ENOMEM;
        }
 
        return 0;
@@ -1990,10 +2287,8 @@ int nd_region_register_namespaces(struct nd_region *nd_region, int *err)
                devs = create_namespace_io(nd_region);
                break;
        case ND_DEVICE_NAMESPACE_PMEM:
-               devs = create_namespace_pmem(nd_region);
-               break;
        case ND_DEVICE_NAMESPACE_BLK:
-               devs = create_namespace_blk(nd_region);
+               devs = create_namespaces(nd_region);
                break;
        default:
                break;
@@ -2014,6 +2309,13 @@ int nd_region_register_namespaces(struct nd_region *nd_region, int *err)
                        id = ida_simple_get(&nd_region->ns_ida, 0, 0,
                                        GFP_KERNEL);
                        nsblk->id = id;
+               } else if (type == ND_DEVICE_NAMESPACE_PMEM) {
+                       struct nd_namespace_pmem *nspm;
+
+                       nspm = to_nd_namespace_pmem(dev);
+                       id = ida_simple_get(&nd_region->ns_ida, 0, 0,
+                                       GFP_KERNEL);
+                       nspm->id = id;
                } else
                        id = i;
 
index 38ce6bb..8623e57 100644 (file)
@@ -44,6 +44,23 @@ struct nvdimm {
        struct resource *flush_wpq;
 };
 
+/**
+ * struct blk_alloc_info - tracking info for BLK dpa scanning
+ * @nd_mapping: blk region mapping boundaries
+ * @available: decremented in alias_dpa_busy as aliased PMEM is scanned
+ * @busy: decremented in blk_dpa_busy to account for ranges already
+ *       handled by alias_dpa_busy
+ * @res: alias_dpa_busy interprets this a free space range that needs to
+ *      be truncated to the valid BLK allocation starting DPA, blk_dpa_busy
+ *      treats it as a busy range that needs the aliased PMEM ranges
+ *      truncated.
+ */
+struct blk_alloc_info {
+       struct nd_mapping *nd_mapping;
+       resource_size_t available, busy;
+       struct resource *res;
+};
+
 bool is_nvdimm(struct device *dev);
 bool is_nd_pmem(struct device *dev);
 bool is_nd_blk(struct device *dev);
@@ -54,7 +71,7 @@ void nvdimm_devs_exit(void);
 void nd_region_devs_exit(void);
 void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev);
 struct nd_region;
-void nd_region_create_blk_seed(struct nd_region *nd_region);
+void nd_region_create_ns_seed(struct nd_region *nd_region);
 void nd_region_create_btt_seed(struct nd_region *nd_region);
 void nd_region_create_pfn_seed(struct nd_region *nd_region);
 void nd_region_create_dax_seed(struct nd_region *nd_region);
@@ -73,13 +90,14 @@ bool nd_is_uuid_unique(struct device *dev, u8 *uuid);
 struct nd_region;
 struct nvdimm_drvdata;
 struct nd_mapping;
+void nd_mapping_free_labels(struct nd_mapping *nd_mapping);
 resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
                struct nd_mapping *nd_mapping, resource_size_t *overlap);
-resource_size_t nd_blk_available_dpa(struct nd_mapping *nd_mapping);
+resource_size_t nd_blk_available_dpa(struct nd_region *nd_region);
 resource_size_t nd_region_available_dpa(struct nd_region *nd_region);
 resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd,
                struct nd_label_id *label_id);
-struct nd_mapping;
+int alias_dpa_busy(struct device *dev, void *data);
 struct resource *nsblk_add_resource(struct nd_region *nd_region,
                struct nvdimm_drvdata *ndd, struct nd_namespace_blk *nsblk,
                resource_size_t start);
index 0b78a82..d3b2fca 100644 (file)
@@ -101,9 +101,6 @@ static inline struct nd_namespace_index *to_next_namespace_index(
                (unsigned long long) (res ? resource_size(res) : 0), \
                (unsigned long long) (res ? res->start : 0), ##arg)
 
-#define for_each_label(l, label, labels) \
-       for (l = 0; (label = labels ? labels[l] : NULL); l++)
-
 #define for_each_dpa_resource(ndd, res) \
        for (res = (ndd)->dpa.child; res; res = res->sibling)
 
@@ -116,6 +113,31 @@ struct nd_percpu_lane {
        spinlock_t lock;
 };
 
+struct nd_label_ent {
+       struct list_head list;
+       struct nd_namespace_label *label;
+};
+
+enum nd_mapping_lock_class {
+       ND_MAPPING_CLASS0,
+       ND_MAPPING_UUID_SCAN,
+};
+
+struct nd_mapping {
+       struct nvdimm *nvdimm;
+       u64 start;
+       u64 size;
+       struct list_head labels;
+       struct mutex lock;
+       /*
+        * @ndd is for private use at region enable / disable time for
+        * get_ndd() + put_ndd(), all other nd_mapping to ndd
+        * conversions use to_ndd() which respects enabled state of the
+        * nvdimm.
+        */
+       struct nvdimm_drvdata *ndd;
+};
+
 struct nd_region {
        struct device dev;
        struct ida ns_ida;
@@ -209,6 +231,7 @@ void nvdimm_exit(void);
 void nd_region_exit(void);
 struct nvdimm;
 struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping);
+int nvdimm_check_config_data(struct device *dev);
 int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd);
 int nvdimm_init_config_data(struct nvdimm_drvdata *ndd);
 int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
index 571a6c7..42b3a82 100644 (file)
@@ -66,13 +66,32 @@ static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
        invalidate_pmem(pmem->virt_addr + offset, len);
 }
 
+static void write_pmem(void *pmem_addr, struct page *page,
+               unsigned int off, unsigned int len)
+{
+       void *mem = kmap_atomic(page);
+
+       memcpy_to_pmem(pmem_addr, mem + off, len);
+       kunmap_atomic(mem);
+}
+
+static int read_pmem(struct page *page, unsigned int off,
+               void *pmem_addr, unsigned int len)
+{
+       int rc;
+       void *mem = kmap_atomic(page);
+
+       rc = memcpy_from_pmem(mem + off, pmem_addr, len);
+       kunmap_atomic(mem);
+       return rc;
+}
+
 static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
                        unsigned int len, unsigned int off, bool is_write,
                        sector_t sector)
 {
        int rc = 0;
        bool bad_pmem = false;
-       void *mem = kmap_atomic(page);
        phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
        void *pmem_addr = pmem->virt_addr + pmem_off;
 
@@ -83,7 +102,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
                if (unlikely(bad_pmem))
                        rc = -EIO;
                else {
-                       rc = memcpy_from_pmem(mem + off, pmem_addr, len);
+                       rc = read_pmem(page, off, pmem_addr, len);
                        flush_dcache_page(page);
                }
        } else {
@@ -102,14 +121,13 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
                 * after clear poison.
                 */
                flush_dcache_page(page);
-               memcpy_to_pmem(pmem_addr, mem + off, len);
+               write_pmem(pmem_addr, page, off, len);
                if (unlikely(bad_pmem)) {
                        pmem_clear_poison(pmem, pmem_off, len);
-                       memcpy_to_pmem(pmem_addr, mem + off, len);
+                       write_pmem(pmem_addr, page, off, len);
                }
        }
 
-       kunmap_atomic(mem);
        return rc;
 }
 
index f9d58c2..6af5e62 100644 (file)
@@ -313,9 +313,8 @@ resource_size_t nd_region_available_dpa(struct nd_region *nd_region)
                                blk_max_overlap = overlap;
                                goto retry;
                        }
-               } else if (is_nd_blk(&nd_region->dev)) {
-                       available += nd_blk_available_dpa(nd_mapping);
-               }
+               } else if (is_nd_blk(&nd_region->dev))
+                       available += nd_blk_available_dpa(nd_region);
        }
 
        return available;
@@ -506,6 +505,17 @@ u64 nd_region_interleave_set_cookie(struct nd_region *nd_region)
        return 0;
 }
 
+void nd_mapping_free_labels(struct nd_mapping *nd_mapping)
+{
+       struct nd_label_ent *label_ent, *e;
+
+       WARN_ON(!mutex_is_locked(&nd_mapping->lock));
+       list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) {
+               list_del(&label_ent->list);
+               kfree(label_ent);
+       }
+}
+
 /*
  * Upon successful probe/remove, take/release a reference on the
  * associated interleave set (if present), and plant new btt + namespace
@@ -526,8 +536,10 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
                        struct nvdimm_drvdata *ndd = nd_mapping->ndd;
                        struct nvdimm *nvdimm = nd_mapping->nvdimm;
 
-                       kfree(nd_mapping->labels);
-                       nd_mapping->labels = NULL;
+                       mutex_lock(&nd_mapping->lock);
+                       nd_mapping_free_labels(nd_mapping);
+                       mutex_unlock(&nd_mapping->lock);
+
                        put_ndd(ndd);
                        nd_mapping->ndd = NULL;
                        if (ndd)
@@ -537,11 +549,12 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
                if (is_nd_pmem(dev))
                        return;
        }
-       if (dev->parent && is_nd_blk(dev->parent) && probe) {
+       if (dev->parent && (is_nd_blk(dev->parent) || is_nd_pmem(dev->parent))
+                       && probe) {
                nd_region = to_nd_region(dev->parent);
                nvdimm_bus_lock(dev);
                if (nd_region->ns_seed == dev)
-                       nd_region_create_blk_seed(nd_region);
+                       nd_region_create_ns_seed(nd_region);
                nvdimm_bus_unlock(dev);
        }
        if (is_nd_btt(dev) && probe) {
@@ -551,23 +564,30 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
                nvdimm_bus_lock(dev);
                if (nd_region->btt_seed == dev)
                        nd_region_create_btt_seed(nd_region);
-               if (nd_region->ns_seed == &nd_btt->ndns->dev &&
-                               is_nd_blk(dev->parent))
-                       nd_region_create_blk_seed(nd_region);
+               if (nd_region->ns_seed == &nd_btt->ndns->dev)
+                       nd_region_create_ns_seed(nd_region);
                nvdimm_bus_unlock(dev);
        }
        if (is_nd_pfn(dev) && probe) {
+               struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+
                nd_region = to_nd_region(dev->parent);
                nvdimm_bus_lock(dev);
                if (nd_region->pfn_seed == dev)
                        nd_region_create_pfn_seed(nd_region);
+               if (nd_region->ns_seed == &nd_pfn->ndns->dev)
+                       nd_region_create_ns_seed(nd_region);
                nvdimm_bus_unlock(dev);
        }
        if (is_nd_dax(dev) && probe) {
+               struct nd_dax *nd_dax = to_nd_dax(dev);
+
                nd_region = to_nd_region(dev->parent);
                nvdimm_bus_lock(dev);
                if (nd_region->dax_seed == dev)
                        nd_region_create_dax_seed(nd_region);
+               if (nd_region->ns_seed == &nd_dax->nd_pfn.ndns->dev)
+                       nd_region_create_ns_seed(nd_region);
                nvdimm_bus_unlock(dev);
        }
 }
@@ -774,10 +794,10 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
        int ro = 0;
 
        for (i = 0; i < ndr_desc->num_mappings; i++) {
-               struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i];
-               struct nvdimm *nvdimm = nd_mapping->nvdimm;
+               struct nd_mapping_desc *mapping = &ndr_desc->mapping[i];
+               struct nvdimm *nvdimm = mapping->nvdimm;
 
-               if ((nd_mapping->start | nd_mapping->size) % SZ_4K) {
+               if ((mapping->start | mapping->size) % SZ_4K) {
                        dev_err(&nvdimm_bus->dev, "%s: %s mapping%d is not 4K aligned\n",
                                        caller, dev_name(&nvdimm->dev), i);
 
@@ -828,11 +848,15 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
                ndl->count = 0;
        }
 
-       memcpy(nd_region->mapping, ndr_desc->nd_mapping,
-                       sizeof(struct nd_mapping) * ndr_desc->num_mappings);
        for (i = 0; i < ndr_desc->num_mappings; i++) {
-               struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i];
-               struct nvdimm *nvdimm = nd_mapping->nvdimm;
+               struct nd_mapping_desc *mapping = &ndr_desc->mapping[i];
+               struct nvdimm *nvdimm = mapping->nvdimm;
+
+               nd_region->mapping[i].nvdimm = nvdimm;
+               nd_region->mapping[i].start = mapping->start;
+               nd_region->mapping[i].size = mapping->size;
+               INIT_LIST_HEAD(&nd_region->mapping[i].labels);
+               mutex_init(&nd_region->mapping[i].lock);
 
                get_device(&nvdimm->dev);
        }
index b519e13..f4947fd 100644 (file)
@@ -50,23 +50,6 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc,
                struct nvdimm *nvdimm, unsigned int cmd, void *buf,
                unsigned int buf_len, int *cmd_rc);
 
-struct nd_namespace_label;
-struct nvdimm_drvdata;
-
-struct nd_mapping {
-       struct nvdimm *nvdimm;
-       struct nd_namespace_label **labels;
-       u64 start;
-       u64 size;
-       /*
-        * @ndd is for private use at region enable / disable time for
-        * get_ndd() + put_ndd(), all other nd_mapping to ndd
-        * conversions use to_ndd() which respects enabled state of the
-        * nvdimm.
-        */
-       struct nvdimm_drvdata *ndd;
-};
-
 struct nvdimm_bus_descriptor {
        const struct attribute_group **attr_groups;
        unsigned long cmd_mask;
@@ -89,9 +72,15 @@ struct nd_interleave_set {
        u64 cookie;
 };
 
+struct nd_mapping_desc {
+       struct nvdimm *nvdimm;
+       u64 start;
+       u64 size;
+};
+
 struct nd_region_desc {
        struct resource *res;
-       struct nd_mapping *nd_mapping;
+       struct nd_mapping_desc *mapping;
        u16 num_mappings;
        const struct attribute_group **attr_groups;
        struct nd_interleave_set *nd_set;
@@ -129,6 +118,8 @@ static inline struct nd_blk_region_desc *to_blk_region_desc(
 }
 
 int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length);
+void nvdimm_clear_from_poison_list(struct nvdimm_bus *nvdimm_bus,
+               phys_addr_t start, unsigned int len);
 struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
                struct nvdimm_bus_descriptor *nfit_desc);
 void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
@@ -139,6 +130,7 @@ struct nd_blk_region *to_nd_blk_region(struct device *dev);
 struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus);
 struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus);
 const char *nvdimm_name(struct nvdimm *nvdimm);
+struct kobject *nvdimm_kobj(struct nvdimm *nvdimm);
 unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm);
 void *nvdimm_provider_data(struct nvdimm *nvdimm);
 struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
index f1ea426..fa66aee 100644 (file)
@@ -77,11 +77,13 @@ struct nd_namespace_io {
  * @nsio: device and system physical address range to drive
  * @alt_name: namespace name supplied in the dimm label
  * @uuid: namespace name supplied in the dimm label
+ * @id: ida allocated id
  */
 struct nd_namespace_pmem {
        struct nd_namespace_io nsio;
        char *alt_name;
        u8 *uuid;
+       int id;
 };
 
 /**
@@ -105,19 +107,19 @@ struct nd_namespace_blk {
        struct resource **res;
 };
 
-static inline struct nd_namespace_io *to_nd_namespace_io(struct device *dev)
+static inline struct nd_namespace_io *to_nd_namespace_io(const struct device *dev)
 {
        return container_of(dev, struct nd_namespace_io, common.dev);
 }
 
-static inline struct nd_namespace_pmem *to_nd_namespace_pmem(struct device *dev)
+static inline struct nd_namespace_pmem *to_nd_namespace_pmem(const struct device *dev)
 {
        struct nd_namespace_io *nsio = to_nd_namespace_io(dev);
 
        return container_of(nsio, struct nd_namespace_pmem, nsio);
 }
 
-static inline struct nd_namespace_blk *to_nd_namespace_blk(struct device *dev)
+static inline struct nd_namespace_blk *to_nd_namespace_blk(const struct device *dev)
 {
        return container_of(dev, struct nd_namespace_blk, common.dev);
 }
index ba5a8c7..ede5c6a 100644 (file)
@@ -21,14 +21,16 @@ struct nd_cmd_smart {
 } __packed;
 
 #define ND_SMART_HEALTH_VALID  (1 << 0)
-#define ND_SMART_TEMP_VALID    (1 << 1)
-#define ND_SMART_SPARES_VALID  (1 << 2)
-#define ND_SMART_ALARM_VALID   (1 << 3)
-#define ND_SMART_USED_VALID    (1 << 4)
-#define ND_SMART_SHUTDOWN_VALID        (1 << 5)
-#define ND_SMART_VENDOR_VALID  (1 << 6)
-#define ND_SMART_TEMP_TRIP     (1 << 0)
-#define ND_SMART_SPARE_TRIP    (1 << 1)
+#define ND_SMART_SPARES_VALID  (1 << 1)
+#define ND_SMART_USED_VALID    (1 << 2)
+#define ND_SMART_TEMP_VALID    (1 << 3)
+#define ND_SMART_CTEMP_VALID   (1 << 4)
+#define ND_SMART_ALARM_VALID   (1 << 9)
+#define ND_SMART_SHUTDOWN_VALID        (1 << 10)
+#define ND_SMART_VENDOR_VALID  (1 << 11)
+#define ND_SMART_SPARE_TRIP    (1 << 0)
+#define ND_SMART_TEMP_TRIP     (1 << 1)
+#define ND_SMART_CTEMP_TRIP    (1 << 2)
 #define ND_SMART_NON_CRITICAL_HEALTH   (1 << 0)
 #define ND_SMART_CRITICAL_HEALTH       (1 << 1)
 #define ND_SMART_FATAL_HEALTH          (1 << 2)
@@ -37,14 +39,15 @@ struct nd_smart_payload {
        __u32 flags;
        __u8 reserved0[4];
        __u8 health;
-       __u16 temperature;
        __u8 spares;
-       __u8 alarm_flags;
        __u8 life_used;
+       __u8 alarm_flags;
+       __u16 temperature;
+       __u16 ctrl_temperature;
+       __u8 reserved1[15];
        __u8 shutdown_state;
-       __u8 reserved1;
        __u32 vendor_size;
-       __u8 vendor_data[108];
+       __u8 vendor_data[92];
 } __packed;
 
 struct nd_cmd_smart_threshold {
@@ -53,7 +56,8 @@ struct nd_cmd_smart_threshold {
 } __packed;
 
 struct nd_smart_threshold_payload {
-       __u16 alarm_control;
+       __u8 alarm_control;
+       __u8 reserved0;
        __u16 temperature;
        __u8 spares;
        __u8 reserved[3];
index ad6dd05..582db95 100644 (file)
@@ -13,6 +13,7 @@ ldflags-y += --wrap=__release_region
 ldflags-y += --wrap=devm_memremap_pages
 ldflags-y += --wrap=insert_resource
 ldflags-y += --wrap=remove_resource
+ldflags-y += --wrap=acpi_evaluate_object
 
 DRIVERS := ../../../drivers
 NVDIMM_SRC := $(DRIVERS)/nvdimm
index c29f8dc..3ccef73 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/pfn_t.h>
+#include <linux/acpi.h>
 #include <linux/io.h>
 #include <linux/mm.h>
 #include "nfit_test.h"
@@ -73,7 +74,7 @@ void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size,
 
        if (nfit_res)
                return (void __iomem *) nfit_res->buf + offset
-                       - nfit_res->res->start;
+                       - nfit_res->res.start;
        return fallback_fn(offset, size);
 }
 
@@ -84,7 +85,7 @@ void __iomem *__wrap_devm_ioremap_nocache(struct device *dev,
 
        if (nfit_res)
                return (void __iomem *) nfit_res->buf + offset
-                       - nfit_res->res->start;
+                       - nfit_res->res.start;
        return devm_ioremap_nocache(dev, offset, size);
 }
 EXPORT_SYMBOL(__wrap_devm_ioremap_nocache);
@@ -95,7 +96,7 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
        struct nfit_test_resource *nfit_res = get_nfit_res(offset);
 
        if (nfit_res)
-               return nfit_res->buf + offset - nfit_res->res->start;
+               return nfit_res->buf + offset - nfit_res->res.start;
        return devm_memremap(dev, offset, size, flags);
 }
 EXPORT_SYMBOL(__wrap_devm_memremap);
@@ -107,7 +108,7 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res,
        struct nfit_test_resource *nfit_res = get_nfit_res(offset);
 
        if (nfit_res)
-               return nfit_res->buf + offset - nfit_res->res->start;
+               return nfit_res->buf + offset - nfit_res->res.start;
        return devm_memremap_pages(dev, res, ref, altmap);
 }
 EXPORT_SYMBOL(__wrap_devm_memremap_pages);
@@ -128,7 +129,7 @@ void *__wrap_memremap(resource_size_t offset, size_t size,
        struct nfit_test_resource *nfit_res = get_nfit_res(offset);
 
        if (nfit_res)
-               return nfit_res->buf + offset - nfit_res->res->start;
+               return nfit_res->buf + offset - nfit_res->res.start;
        return memremap(offset, size, flags);
 }
 EXPORT_SYMBOL(__wrap_memremap);
@@ -174,6 +175,63 @@ void __wrap_memunmap(void *addr)
 }
 EXPORT_SYMBOL(__wrap_memunmap);
 
+static bool nfit_test_release_region(struct device *dev,
+               struct resource *parent, resource_size_t start,
+               resource_size_t n);
+
+static void nfit_devres_release(struct device *dev, void *data)
+{
+       struct resource *res = *((struct resource **) data);
+
+       WARN_ON(!nfit_test_release_region(NULL, &iomem_resource, res->start,
+                       resource_size(res)));
+}
+
+static int match(struct device *dev, void *__res, void *match_data)
+{
+       struct resource *res = *((struct resource **) __res);
+       resource_size_t start = *((resource_size_t *) match_data);
+
+       return res->start == start;
+}
+
+static bool nfit_test_release_region(struct device *dev,
+               struct resource *parent, resource_size_t start,
+               resource_size_t n)
+{
+       if (parent == &iomem_resource) {
+               struct nfit_test_resource *nfit_res = get_nfit_res(start);
+
+               if (nfit_res) {
+                       struct nfit_test_request *req;
+                       struct resource *res = NULL;
+
+                       if (dev) {
+                               devres_release(dev, nfit_devres_release, match,
+                                               &start);
+                               return true;
+                       }
+
+                       spin_lock(&nfit_res->lock);
+                       list_for_each_entry(req, &nfit_res->requests, list)
+                               if (req->res.start == start) {
+                                       res = &req->res;
+                                       list_del(&req->list);
+                                       break;
+                               }
+                       spin_unlock(&nfit_res->lock);
+
+                       WARN(!res || resource_size(res) != n,
+                                       "%s: start: %llx n: %llx mismatch: %pr\n",
+                                               __func__, start, n, res);
+                       if (res)
+                               kfree(req);
+                       return true;
+               }
+       }
+       return false;
+}
+
 static struct resource *nfit_test_request_region(struct device *dev,
                struct resource *parent, resource_size_t start,
                resource_size_t n, const char *name, int flags)
@@ -183,21 +241,57 @@ static struct resource *nfit_test_request_region(struct device *dev,
        if (parent == &iomem_resource) {
                nfit_res = get_nfit_res(start);
                if (nfit_res) {
-                       struct resource *res = nfit_res->res + 1;
+                       struct nfit_test_request *req;
+                       struct resource *res = NULL;
 
-                       if (start + n > nfit_res->res->start
-                                       + resource_size(nfit_res->res)) {
+                       if (start + n > nfit_res->res.start
+                                       + resource_size(&nfit_res->res)) {
                                pr_debug("%s: start: %llx n: %llx overflow: %pr\n",
                                                __func__, start, n,
-                                               nfit_res->res);
+                                               &nfit_res->res);
                                return NULL;
                        }
 
+                       spin_lock(&nfit_res->lock);
+                       list_for_each_entry(req, &nfit_res->requests, list)
+                               if (start == req->res.start) {
+                                       res = &req->res;
+                                       break;
+                               }
+                       spin_unlock(&nfit_res->lock);
+
+                       if (res) {
+                               WARN(1, "%pr already busy\n", res);
+                               return NULL;
+                       }
+
+                       req = kzalloc(sizeof(*req), GFP_KERNEL);
+                       if (!req)
+                               return NULL;
+                       INIT_LIST_HEAD(&req->list);
+                       res = &req->res;
+
                        res->start = start;
                        res->end = start + n - 1;
                        res->name = name;
                        res->flags = resource_type(parent);
                        res->flags |= IORESOURCE_BUSY | flags;
+                       spin_lock(&nfit_res->lock);
+                       list_add(&req->list, &nfit_res->requests);
+                       spin_unlock(&nfit_res->lock);
+
+                       if (dev) {
+                               struct resource **d;
+
+                               d = devres_alloc(nfit_devres_release,
+                                               sizeof(struct resource *),
+                                               GFP_KERNEL);
+                               if (!d)
+                                       return NULL;
+                               *d = res;
+                               devres_add(dev, d);
+                       }
+
                        pr_debug("%s: %pr\n", __func__, res);
                        return res;
                }
@@ -241,29 +335,10 @@ struct resource *__wrap___devm_request_region(struct device *dev,
 }
 EXPORT_SYMBOL(__wrap___devm_request_region);
 
-static bool nfit_test_release_region(struct resource *parent,
-               resource_size_t start, resource_size_t n)
-{
-       if (parent == &iomem_resource) {
-               struct nfit_test_resource *nfit_res = get_nfit_res(start);
-               if (nfit_res) {
-                       struct resource *res = nfit_res->res + 1;
-
-                       if (start != res->start || resource_size(res) != n)
-                               pr_info("%s: start: %llx n: %llx mismatch: %pr\n",
-                                               __func__, start, n, res);
-                       else
-                               memset(res, 0, sizeof(*res));
-                       return true;
-               }
-       }
-       return false;
-}
-
 void __wrap___release_region(struct resource *parent, resource_size_t start,
                resource_size_t n)
 {
-       if (!nfit_test_release_region(parent, start, n))
+       if (!nfit_test_release_region(NULL, parent, start, n))
                __release_region(parent, start, n);
 }
 EXPORT_SYMBOL(__wrap___release_region);
@@ -271,9 +346,25 @@ EXPORT_SYMBOL(__wrap___release_region);
 void __wrap___devm_release_region(struct device *dev, struct resource *parent,
                resource_size_t start, resource_size_t n)
 {
-       if (!nfit_test_release_region(parent, start, n))
+       if (!nfit_test_release_region(dev, parent, start, n))
                __devm_release_region(dev, parent, start, n);
 }
 EXPORT_SYMBOL(__wrap___devm_release_region);
 
+acpi_status __wrap_acpi_evaluate_object(acpi_handle handle, acpi_string path,
+               struct acpi_object_list *p, struct acpi_buffer *buf)
+{
+       struct nfit_test_resource *nfit_res = get_nfit_res((long) handle);
+       union acpi_object **obj;
+
+       if (!nfit_res || strcmp(path, "_FIT") || !buf)
+               return acpi_evaluate_object(handle, path, p, buf);
+
+       obj = nfit_res->buf;
+       buf->length = sizeof(union acpi_object);
+       buf->pointer = *obj;
+       return AE_OK;
+}
+EXPORT_SYMBOL(__wrap_acpi_evaluate_object);
+
 MODULE_LICENSE("GPL v2");
index f64c57b..c9a6458 100644 (file)
@@ -132,6 +132,8 @@ static u32 handle[NUM_DCR] = {
        [4] = NFIT_DIMM_HANDLE(0, 1, 0, 0, 0),
 };
 
+static unsigned long dimm_fail_cmd_flags[NUM_DCR];
+
 struct nfit_test {
        struct acpi_nfit_desc acpi_desc;
        struct platform_device pdev;
@@ -154,11 +156,14 @@ struct nfit_test {
        int (*alloc)(struct nfit_test *t);
        void (*setup)(struct nfit_test *t);
        int setup_hotplug;
+       union acpi_object **_fit;
+       dma_addr_t _fit_dma;
        struct ars_state {
                struct nd_cmd_ars_status *ars_status;
                unsigned long deadline;
                spinlock_t lock;
        } ars_state;
+       struct device *dimm_dev[NUM_DCR];
 };
 
 static struct nfit_test *to_nfit_test(struct device *dev)
@@ -411,6 +416,9 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
                if (i >= ARRAY_SIZE(handle))
                        return -ENXIO;
 
+               if ((1 << func) & dimm_fail_cmd_flags[i])
+                       return -EIO;
+
                switch (func) {
                case ND_CMD_GET_CONFIG_SIZE:
                        rc = nfit_test_cmd_get_config_size(buf, buf_len);
@@ -428,6 +436,9 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
                        break;
                case ND_CMD_SMART_THRESHOLD:
                        rc = nfit_test_cmd_smart_threshold(buf, buf_len);
+                       device_lock(&t->pdev.dev);
+                       __acpi_nvdimm_notify(t->dimm_dev[i], 0x81);
+                       device_unlock(&t->pdev.dev);
                        break;
                default:
                        return -ENOTTY;
@@ -467,14 +478,12 @@ static struct nfit_test *instances[NUM_NFITS];
 static void release_nfit_res(void *data)
 {
        struct nfit_test_resource *nfit_res = data;
-       struct resource *res = nfit_res->res;
 
        spin_lock(&nfit_test_lock);
        list_del(&nfit_res->list);
        spin_unlock(&nfit_test_lock);
 
        vfree(nfit_res->buf);
-       kfree(res);
        kfree(nfit_res);
 }
 
@@ -482,12 +491,11 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
                void *buf)
 {
        struct device *dev = &t->pdev.dev;
-       struct resource *res = kzalloc(sizeof(*res) * 2, GFP_KERNEL);
        struct nfit_test_resource *nfit_res = kzalloc(sizeof(*nfit_res),
                        GFP_KERNEL);
        int rc;
 
-       if (!res || !buf || !nfit_res)
+       if (!buf || !nfit_res)
                goto err;
        rc = devm_add_action(dev, release_nfit_res, nfit_res);
        if (rc)
@@ -496,10 +504,11 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
        memset(buf, 0, size);
        nfit_res->dev = dev;
        nfit_res->buf = buf;
-       nfit_res->res = res;
-       res->start = *dma;
-       res->end = *dma + size - 1;
-       res->name = "NFIT";
+       nfit_res->res.start = *dma;
+       nfit_res->res.end = *dma + size - 1;
+       nfit_res->res.name = "NFIT";
+       spin_lock_init(&nfit_res->lock);
+       INIT_LIST_HEAD(&nfit_res->requests);
        spin_lock(&nfit_test_lock);
        list_add(&nfit_res->list, &t->resources);
        spin_unlock(&nfit_test_lock);
@@ -508,7 +517,6 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
  err:
        if (buf)
                vfree(buf);
-       kfree(res);
        kfree(nfit_res);
        return NULL;
 }
@@ -533,13 +541,13 @@ static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr)
                        continue;
                spin_lock(&nfit_test_lock);
                list_for_each_entry(n, &t->resources, list) {
-                       if (addr >= n->res->start && (addr < n->res->start
-                                               + resource_size(n->res))) {
+                       if (addr >= n->res.start && (addr < n->res.start
+                                               + resource_size(&n->res))) {
                                nfit_res = n;
                                break;
                        } else if (addr >= (unsigned long) n->buf
                                        && (addr < (unsigned long) n->buf
-                                               + resource_size(n->res))) {
+                                               + resource_size(&n->res))) {
                                nfit_res = n;
                                break;
                        }
@@ -564,6 +572,86 @@ static int ars_state_init(struct device *dev, struct ars_state *ars_state)
        return 0;
 }
 
+static void put_dimms(void *data)
+{
+       struct device **dimm_dev = data;
+       int i;
+
+       for (i = 0; i < NUM_DCR; i++)
+               if (dimm_dev[i])
+                       device_unregister(dimm_dev[i]);
+}
+
+static struct class *nfit_test_dimm;
+
+static int dimm_name_to_id(struct device *dev)
+{
+       int dimm;
+
+       if (sscanf(dev_name(dev), "test_dimm%d", &dimm) != 1
+                       || dimm >= NUM_DCR || dimm < 0)
+               return -ENXIO;
+       return dimm;
+}
+
+
+static ssize_t handle_show(struct device *dev, struct device_attribute *attr,
+               char *buf)
+{
+       int dimm = dimm_name_to_id(dev);
+
+       if (dimm < 0)
+               return dimm;
+
+       return sprintf(buf, "%#x", handle[dimm]);
+}
+DEVICE_ATTR_RO(handle);
+
+static ssize_t fail_cmd_show(struct device *dev, struct device_attribute *attr,
+               char *buf)
+{
+       int dimm = dimm_name_to_id(dev);
+
+       if (dimm < 0)
+               return dimm;
+
+       return sprintf(buf, "%#lx\n", dimm_fail_cmd_flags[dimm]);
+}
+
+static ssize_t fail_cmd_store(struct device *dev, struct device_attribute *attr,
+               const char *buf, size_t size)
+{
+       int dimm = dimm_name_to_id(dev);
+       unsigned long val;
+       ssize_t rc;
+
+       if (dimm < 0)
+               return dimm;
+
+       rc = kstrtol(buf, 0, &val);
+       if (rc)
+               return rc;
+
+       dimm_fail_cmd_flags[dimm] = val;
+       return size;
+}
+static DEVICE_ATTR_RW(fail_cmd);
+
+static struct attribute *nfit_test_dimm_attributes[] = {
+       &dev_attr_fail_cmd.attr,
+       &dev_attr_handle.attr,
+       NULL,
+};
+
+static struct attribute_group nfit_test_dimm_attribute_group = {
+       .attrs = nfit_test_dimm_attributes,
+};
+
+static const struct attribute_group *nfit_test_dimm_attribute_groups[] = {
+       &nfit_test_dimm_attribute_group,
+       NULL,
+};
+
 static int nfit_test0_alloc(struct nfit_test *t)
 {
        size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA
@@ -616,6 +704,21 @@ static int nfit_test0_alloc(struct nfit_test *t)
                        return -ENOMEM;
        }
 
+       t->_fit = test_alloc(t, sizeof(union acpi_object **), &t->_fit_dma);
+       if (!t->_fit)
+               return -ENOMEM;
+
+       if (devm_add_action_or_reset(&t->pdev.dev, put_dimms, t->dimm_dev))
+               return -ENOMEM;
+       for (i = 0; i < NUM_DCR; i++) {
+               t->dimm_dev[i] = device_create_with_groups(nfit_test_dimm,
+                               &t->pdev.dev, 0, NULL,
+                               nfit_test_dimm_attribute_groups,
+                               "test_dimm%d", i);
+               if (!t->dimm_dev[i])
+                       return -ENOMEM;
+       }
+
        return ars_state_init(&t->pdev.dev, &t->ars_state);
 }
 
@@ -1409,6 +1512,8 @@ static int nfit_test_probe(struct platform_device *pdev)
        struct acpi_nfit_desc *acpi_desc;
        struct device *dev = &pdev->dev;
        struct nfit_test *nfit_test;
+       struct nfit_mem *nfit_mem;
+       union acpi_object *obj;
        int rc;
 
        nfit_test = to_nfit_test(&pdev->dev);
@@ -1476,14 +1581,30 @@ static int nfit_test_probe(struct platform_device *pdev)
        if (nfit_test->setup != nfit_test0_setup)
                return 0;
 
-       flush_work(&acpi_desc->work);
        nfit_test->setup_hotplug = 1;
        nfit_test->setup(nfit_test);
 
-       rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf,
-                       nfit_test->nfit_size);
-       if (rc)
-               return rc;
+       obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+       if (!obj)
+               return -ENOMEM;
+       obj->type = ACPI_TYPE_BUFFER;
+       obj->buffer.length = nfit_test->nfit_size;
+       obj->buffer.pointer = nfit_test->nfit_buf;
+       *(nfit_test->_fit) = obj;
+       __acpi_nfit_notify(&pdev->dev, nfit_test, 0x80);
+
+       /* associate dimm devices with nfit_mem data for notification testing */
+       mutex_lock(&acpi_desc->init_mutex);
+       list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
+               u32 nfit_handle = __to_nfit_memdev(nfit_mem)->device_handle;
+               int i;
+
+               for (i = 0; i < NUM_DCR; i++)
+                       if (nfit_handle == handle[i])
+                               dev_set_drvdata(nfit_test->dimm_dev[i],
+                                               nfit_mem);
+       }
+       mutex_unlock(&acpi_desc->init_mutex);
 
        return 0;
 }
@@ -1518,6 +1639,10 @@ static __init int nfit_test_init(void)
 {
        int rc, i;
 
+       nfit_test_dimm = class_create(THIS_MODULE, "nfit_test_dimm");
+       if (IS_ERR(nfit_test_dimm))
+               return PTR_ERR(nfit_test_dimm);
+
        nfit_test_setup(nfit_test_lookup);
 
        for (i = 0; i < NUM_NFITS; i++) {
@@ -1584,6 +1709,7 @@ static __exit void nfit_test_exit(void)
        for (i = 0; i < NUM_NFITS; i++)
                platform_device_unregister(&instances[i]->pdev);
        nfit_test_teardown();
+       class_destroy(nfit_test_dimm);
 }
 
 module_init(nfit_test_init);
index 9f18e2a..c281dd2 100644 (file)
 #ifndef __NFIT_TEST_H__
 #define __NFIT_TEST_H__
 #include <linux/list.h>
+#include <linux/ioport.h>
+#include <linux/spinlock_types.h>
+
+struct nfit_test_request {
+       struct list_head list;
+       struct resource res;
+};
 
 struct nfit_test_resource {
+       struct list_head requests;
        struct list_head list;
-       struct resource *res;
+       struct resource res;
        struct device *dev;
+       spinlock_t lock;
+       int req_count;
        void *buf;
 };