Merge tag 'libnvdimm-for-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 8 Sep 2015 21:35:59 +0000 (14:35 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 8 Sep 2015 21:35:59 +0000 (14:35 -0700)
Pull libnvdimm updates from Dan Williams:
 "This update has successfully completed a 0day-kbuild run and has
  appeared in a linux-next release.  The changes outside of the typical
  drivers/nvdimm/ and drivers/acpi/nfit.[ch] paths are related to the
  removal of IORESOURCE_CACHEABLE, the introduction of memremap(), and
  the introduction of ZONE_DEVICE + devm_memremap_pages().

  Summary:

   - Introduce ZONE_DEVICE and devm_memremap_pages() as a generic
     mechanism for adding device-driver-discovered memory regions to the
     kernel's direct map.

     This facility is used by the pmem driver to enable pfn_to_page()
     operations on the page frames returned by DAX ('direct_access' in
     'struct block_device_operations').

     For now, the 'memmap' allocation for these "device" pages comes
     from "System RAM".  Support for allocating the memmap from device
     memory will arrive in a later kernel.

   - Introduce memremap() to replace usages of ioremap_cache() and
     ioremap_wt().  memremap() drops the __iomem annotation for these
     mappings to memory that do not have i/o side effects.  The
     replacement of ioremap_cache() with memremap() is limited to the
     pmem driver to ease merging the api change in v4.3.

     Completion of the conversion is targeted for v4.4.

   - Similar to the usage of memcpy_to_pmem() + wmb_pmem() in the pmem
     driver, update the VFS DAX implementation and PMEM api to provide
     persistence guarantees for kernel operations on a DAX mapping.

   - Convert the ACPI NFIT 'BLK' driver to map the block apertures as
     cacheable to improve performance.

   - Miscellaneous updates and fixes to libnvdimm including support for
     issuing "address range scrub" commands, clarifying the optimal
     'sector size' of pmem devices, a clarification of the usage of the
     ACPI '_STA' (status) property for DIMM devices, and other minor
     fixes"

* tag 'libnvdimm-for-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (34 commits)
  libnvdimm, pmem: direct map legacy pmem by default
  libnvdimm, pmem: 'struct page' for pmem
  libnvdimm, pfn: 'struct page' provider infrastructure
  x86, pmem: clarify that ARCH_HAS_PMEM_API implies PMEM mapped WB
  add devm_memremap_pages
  mm: ZONE_DEVICE for "device memory"
  mm: move __phys_to_pfn and __pfn_to_phys to asm/generic/memory_model.h
  dax: drop size parameter to ->direct_access()
  nd_blk: change aperture mapping from WC to WB
  nvdimm: change to use generic kvfree()
  pmem, dax: have direct_access use __pmem annotation
  dax: update I/O path to do proper PMEM flushing
  pmem: add copy_from_iter_pmem() and clear_pmem()
  pmem, x86: clean up conditional pmem includes
  pmem: remove layer when calling arch_has_wmb_pmem()
  pmem, x86: move x86 PMEM API to new pmem.h header
  libnvdimm, e820: make CONFIG_X86_PMEM_LEGACY a tristate option
  pmem: switch to devm_ allocations
  devres: add devm_memremap
  libnvdimm, btt: write and validate parent_uuid
  ...

92 files changed:
Documentation/filesystems/Locking
MAINTAINERS
arch/arm/include/asm/memory.h
arch/arm/mach-clps711x/board-cdb89712.c
arch/arm/mach-shmobile/pm-rcar.c
arch/arm64/include/asm/memory.h
arch/ia64/include/asm/io.h
arch/ia64/kernel/cyclone.c
arch/ia64/mm/init.c
arch/powerpc/kernel/pci_of_scan.c
arch/powerpc/mm/mem.c
arch/powerpc/sysdev/axonram.c
arch/s390/mm/init.c
arch/sh/include/asm/io.h
arch/sh/mm/init.c
arch/sparc/kernel/pci.c
arch/tile/mm/init.c
arch/unicore32/include/asm/memory.h
arch/x86/Kconfig
arch/x86/include/asm/cacheflush.h
arch/x86/include/asm/io.h
arch/x86/include/asm/pmem.h [new file with mode: 0644]
arch/x86/include/uapi/asm/e820.h
arch/x86/kernel/Makefile
arch/x86/kernel/pmem.c
arch/x86/mm/init_32.c
arch/x86/mm/init_64.c
arch/xtensa/include/asm/io.h
drivers/acpi/Kconfig
drivers/acpi/nfit.c
drivers/acpi/nfit.h
drivers/block/brd.c
drivers/isdn/icn/icn.h
drivers/mtd/devices/slram.c
drivers/mtd/nand/diskonchip.c
drivers/mtd/onenand/generic.c
drivers/nvdimm/Kconfig
drivers/nvdimm/Makefile
drivers/nvdimm/btt.c
drivers/nvdimm/btt.h
drivers/nvdimm/btt_devs.c
drivers/nvdimm/claim.c [new file with mode: 0644]
drivers/nvdimm/dimm_devs.c
drivers/nvdimm/e820.c [new file with mode: 0644]
drivers/nvdimm/namespace_devs.c
drivers/nvdimm/nd-core.h
drivers/nvdimm/nd.h
drivers/nvdimm/pfn.h [new file with mode: 0644]
drivers/nvdimm/pfn_devs.c [new file with mode: 0644]
drivers/nvdimm/pmem.c
drivers/nvdimm/region.c
drivers/nvdimm/region_devs.c
drivers/pci/probe.c
drivers/pnp/manager.c
drivers/s390/block/dcssblk.c
drivers/scsi/aic94xx/aic94xx_init.c
drivers/scsi/arcmsr/arcmsr_hba.c
drivers/scsi/mvsas/mv_init.c
drivers/scsi/sun3x_esp.c
drivers/staging/comedi/drivers/ii_pci20kc.c
drivers/staging/unisys/visorbus/visorchannel.c
drivers/staging/unisys/visorbus/visorchipset.c
drivers/tty/serial/8250/8250_core.c
drivers/video/fbdev/ocfb.c
drivers/video/fbdev/s1d13xxxfb.c
drivers/video/fbdev/stifb.c
fs/block_dev.c
fs/dax.c
include/asm-generic/memory_model.h
include/linux/blkdev.h
include/linux/io-mapping.h
include/linux/io.h
include/linux/libnvdimm.h
include/linux/memory_hotplug.h
include/linux/mm.h
include/linux/mmzone.h
include/linux/mtd/map.h
include/linux/pmem.h
include/uapi/linux/ndctl.h
include/video/vga.h
kernel/Makefile
kernel/memremap.c [new file with mode: 0644]
kernel/resource.c
lib/Kconfig
lib/devres.c
lib/pci_iomap.c
mm/Kconfig
mm/memory_hotplug.c
mm/page_alloc.c
tools/testing/nvdimm/Kbuild
tools/testing/nvdimm/test/iomap.c
tools/testing/nvdimm/test/nfit.c

index 6a34a0f..06d4434 100644 (file)
@@ -397,7 +397,8 @@ prototypes:
        int (*release) (struct gendisk *, fmode_t);
        int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
        int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
-       int (*direct_access) (struct block_device *, sector_t, void **, unsigned long *);
+       int (*direct_access) (struct block_device *, sector_t, void __pmem **,
+                               unsigned long *);
        int (*media_changed) (struct gendisk *);
        void (*unlock_native_capacity) (struct gendisk *);
        int (*revalidate_disk) (struct gendisk *);
index 6dfc224..8277838 100644 (file)
@@ -6229,6 +6229,7 @@ Q:        https://patchwork.kernel.org/project/linux-nvdimm/list/
 S:     Supported
 F:     drivers/nvdimm/pmem.c
 F:     include/linux/pmem.h
+F:     arch/*/include/asm/pmem.h
 
 LINUX FOR IBM pSERIES (RS/6000)
 M:     Paul Mackerras <paulus@au.ibm.com>
index b7f6fb4..98d58bb 100644 (file)
 #define DTCM_OFFSET    UL(0xfffe8000)
 #endif
 
-/*
- * Convert a physical address to a Page Frame Number and back
- */
-#define        __phys_to_pfn(paddr)    ((unsigned long)((paddr) >> PAGE_SHIFT))
-#define        __pfn_to_phys(pfn)      ((phys_addr_t)(pfn) << PAGE_SHIFT)
-
 /*
  * Convert a page to/from a physical address
  */
index 1ec378c..972abdb 100644 (file)
@@ -95,7 +95,7 @@ static struct physmap_flash_data cdb89712_bootrom_pdata __initdata = {
 
 static struct resource cdb89712_bootrom_resources[] __initdata = {
        DEFINE_RES_NAMED(CS7_PHYS_BASE, SZ_128, "BOOTROM", IORESOURCE_MEM |
-                        IORESOURCE_CACHEABLE | IORESOURCE_READONLY),
+                        IORESOURCE_READONLY),
 };
 
 static struct platform_device cdb89712_bootrom_pdev __initdata = {
index 4092ad1..0af05d2 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/err.h>
 #include <linux/mm.h>
 #include <linux/spinlock.h>
-#include <asm/io.h>
+#include <linux/io.h>
 #include "pm-rcar.h"
 
 /* SYSC Common */
index 44a59c2..6b4c3ad 100644 (file)
 #define __virt_to_phys(x)      (((phys_addr_t)(x) - PAGE_OFFSET + PHYS_OFFSET))
 #define __phys_to_virt(x)      ((unsigned long)((x) - PHYS_OFFSET + PAGE_OFFSET))
 
-/*
- * Convert a physical address to a Page Frame Number and back
- */
-#define        __phys_to_pfn(paddr)    ((unsigned long)((paddr) >> PAGE_SHIFT))
-#define        __pfn_to_phys(pfn)      ((phys_addr_t)(pfn) << PAGE_SHIFT)
-
 /*
  * Convert a page to/from a physical address
  */
index 80a7e34..9041bbe 100644 (file)
@@ -435,6 +435,7 @@ static inline void __iomem * ioremap_cache (unsigned long phys_addr, unsigned lo
 {
        return ioremap(phys_addr, size);
 }
+#define ioremap_cache ioremap_cache
 
 
 /*
index 4826ff9..5fa3848 100644 (file)
@@ -4,7 +4,7 @@
 #include <linux/errno.h>
 #include <linux/timex.h>
 #include <linux/clocksource.h>
-#include <asm/io.h>
+#include <linux/io.h>
 
 /* IBM Summit (EXA) Cyclone counter code*/
 #define CYCLONE_CBAR_ADDR 0xFEB00CD0
index 97e48b0..1841ef6 100644 (file)
@@ -645,7 +645,7 @@ mem_init (void)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size)
+int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
        pg_data_t *pgdat;
        struct zone *zone;
@@ -656,7 +656,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
        pgdat = NODE_DATA(nid);
 
        zone = pgdat->node_zones +
-               zone_for_memory(nid, start, size, ZONE_NORMAL);
+               zone_for_memory(nid, start, size, ZONE_NORMAL, for_device);
        ret = __add_pages(nid, zone, start_pfn, nr_pages);
 
        if (ret)
index c8c62c7..2e710c1 100644 (file)
@@ -102,7 +102,7 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev)
                        res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2];
                } else if (i == dev->rom_base_reg) {
                        res = &dev->resource[PCI_ROM_RESOURCE];
-                       flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE;
+                       flags |= IORESOURCE_READONLY;
                } else {
                        printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i);
                        continue;
index e1fe333..22d94c3 100644 (file)
@@ -113,7 +113,7 @@ int memory_add_physaddr_to_nid(u64 start)
 }
 #endif
 
-int arch_add_memory(int nid, u64 start, u64 size)
+int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
        struct pglist_data *pgdata;
        struct zone *zone;
@@ -128,7 +128,7 @@ int arch_add_memory(int nid, u64 start, u64 size)
 
        /* this should work for most non-highmem platforms */
        zone = pgdata->node_zones +
-               zone_for_memory(nid, start, size, 0);
+               zone_for_memory(nid, start, size, 0, for_device);
 
        return __add_pages(nid, zone, start_pfn, nr_pages);
 }
index f86250c..d2b79bc 100644 (file)
@@ -141,13 +141,14 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
  */
 static long
 axon_ram_direct_access(struct block_device *device, sector_t sector,
-                      void **kaddr, unsigned long *pfn, long size)
+                      void __pmem **kaddr, unsigned long *pfn)
 {
        struct axon_ram_bank *bank = device->bd_disk->private_data;
        loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT;
+       void *addr = (void *)(bank->ph_addr + offset);
 
-       *kaddr = (void *)(bank->ph_addr + offset);
-       *pfn = virt_to_phys(*kaddr) >> PAGE_SHIFT;
+       *kaddr = (void __pmem *)addr;
+       *pfn = virt_to_phys(addr) >> PAGE_SHIFT;
 
        return bank->size - offset;
 }
index 2963b56..c3c07d3 100644 (file)
@@ -169,7 +169,7 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
 #endif
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size)
+int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
        unsigned long normal_end_pfn = PFN_DOWN(memblock_end_of_DRAM());
        unsigned long dma_end_pfn = PFN_DOWN(MAX_DMA_ADDRESS);
index 93ec906..3280a6b 100644 (file)
@@ -342,6 +342,7 @@ ioremap_cache(phys_addr_t offset, unsigned long size)
 {
        return __ioremap_mode(offset, size, PAGE_KERNEL);
 }
+#define ioremap_cache ioremap_cache
 
 #ifdef CONFIG_HAVE_IOREMAP_PROT
 static inline void __iomem *
index 17f4862..7549186 100644 (file)
@@ -485,7 +485,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 #endif
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size)
+int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
        pg_data_t *pgdat;
        unsigned long start_pfn = PFN_DOWN(start);
@@ -496,7 +496,8 @@ int arch_add_memory(int nid, u64 start, u64 size)
 
        /* We only have ZONE_NORMAL, so this is easy.. */
        ret = __add_pages(nid, pgdat->node_zones +
-                       zone_for_memory(nid, start, size, ZONE_NORMAL),
+                       zone_for_memory(nid, start, size, ZONE_NORMAL,
+                       for_device),
                        start_pfn, nr_pages);
        if (unlikely(ret))
                printk("%s: Failed, __add_pages() == %d\n", __func__, ret);
index 3a14a35..b91d7f1 100644 (file)
@@ -231,8 +231,7 @@ static void pci_parse_of_addrs(struct platform_device *op,
                        res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2];
                } else if (i == dev->rom_base_reg) {
                        res = &dev->resource[PCI_ROM_RESOURCE];
-                       flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE
-                             | IORESOURCE_SIZEALIGN;
+                       flags |= IORESOURCE_READONLY | IORESOURCE_SIZEALIGN;
                } else {
                        printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i);
                        continue;
index 5bd252e..d4e1fc4 100644 (file)
@@ -863,7 +863,7 @@ void __init mem_init(void)
  * memory to the highmem for now.
  */
 #ifndef CONFIG_NEED_MULTIPLE_NODES
-int arch_add_memory(u64 start, u64 size)
+int arch_add_memory(u64 start, u64 size, bool for_device)
 {
        struct pglist_data *pgdata = &contig_page_data;
        struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
index debafc4..3bb0a29 100644 (file)
 #define __phys_to_virt(x)      ((x) - PHYS_OFFSET + PAGE_OFFSET)
 #endif
 
-/*
- * Convert a physical address to a Page Frame Number and back
- */
-#define        __phys_to_pfn(paddr)    ((paddr) >> PAGE_SHIFT)
-#define        __pfn_to_phys(pfn)      ((pfn) << PAGE_SHIFT)
-
 /*
  * Convert a page to/from a physical address
  */
index 117e2f3..cc0d73e 100644 (file)
@@ -27,7 +27,8 @@ config X86
        select ARCH_HAS_ELF_RANDOMIZE
        select ARCH_HAS_FAST_MULTIPLIER
        select ARCH_HAS_GCOV_PROFILE_ALL
-       select ARCH_HAS_PMEM_API
+       select ARCH_HAS_PMEM_API                if X86_64
+       select ARCH_HAS_MMIO_FLUSH
        select ARCH_HAS_SG_CHAIN
        select ARCH_HAVE_NMI_SAFE_CMPXCHG
        select ARCH_MIGHT_HAVE_ACPI_PDC         if ACPI
@@ -1450,10 +1451,14 @@ config ILLEGAL_POINTER_VALUE
 
 source "mm/Kconfig"
 
+config X86_PMEM_LEGACY_DEVICE
+       bool
+
 config X86_PMEM_LEGACY
-       bool "Support non-standard NVDIMMs and ADR protected memory"
+       tristate "Support non-standard NVDIMMs and ADR protected memory"
        depends on PHYS_ADDR_T_64BIT
        depends on BLK_DEV
+       select X86_PMEM_LEGACY_DEVICE
        select LIBNVDIMM
        help
          Treat memory marked using the non-standard e820 type of 12 as used
index 9bf3ea1..e63aa38 100644 (file)
@@ -89,6 +89,8 @@ int set_pages_rw(struct page *page, int numpages);
 
 void clflush_cache_range(void *addr, unsigned int size);
 
+#define mmio_flush_range(addr, size) clflush_cache_range(addr, size)
+
 #ifdef CONFIG_DEBUG_RODATA
 void mark_rodata_ro(void);
 extern const int rodata_test_data;
@@ -109,75 +111,4 @@ static inline int rodata_test(void)
 }
 #endif
 
-#ifdef ARCH_HAS_NOCACHE_UACCESS
-
-/**
- * arch_memcpy_to_pmem - copy data to persistent memory
- * @dst: destination buffer for the copy
- * @src: source buffer for the copy
- * @n: length of the copy in bytes
- *
- * Copy data to persistent memory media via non-temporal stores so that
- * a subsequent arch_wmb_pmem() can flush cpu and memory controller
- * write buffers to guarantee durability.
- */
-static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
-               size_t n)
-{
-       int unwritten;
-
-       /*
-        * We are copying between two kernel buffers, if
-        * __copy_from_user_inatomic_nocache() returns an error (page
-        * fault) we would have already reported a general protection fault
-        * before the WARN+BUG.
-        */
-       unwritten = __copy_from_user_inatomic_nocache((void __force *) dst,
-                       (void __user *) src, n);
-       if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n",
-                               __func__, dst, src, unwritten))
-               BUG();
-}
-
-/**
- * arch_wmb_pmem - synchronize writes to persistent memory
- *
- * After a series of arch_memcpy_to_pmem() operations this drains data
- * from cpu write buffers and any platform (memory controller) buffers
- * to ensure that written data is durable on persistent memory media.
- */
-static inline void arch_wmb_pmem(void)
-{
-       /*
-        * wmb() to 'sfence' all previous writes such that they are
-        * architecturally visible to 'pcommit'.  Note, that we've
-        * already arranged for pmem writes to avoid the cache via
-        * arch_memcpy_to_pmem().
-        */
-       wmb();
-       pcommit_sfence();
-}
-
-static inline bool __arch_has_wmb_pmem(void)
-{
-#ifdef CONFIG_X86_64
-       /*
-        * We require that wmb() be an 'sfence', that is only guaranteed on
-        * 64-bit builds
-        */
-       return static_cpu_has(X86_FEATURE_PCOMMIT);
-#else
-       return false;
-#endif
-}
-#else /* ARCH_HAS_NOCACHE_UACCESS i.e. ARCH=um */
-extern void arch_memcpy_to_pmem(void __pmem *dst, const void *src, size_t n);
-extern void arch_wmb_pmem(void);
-
-static inline bool __arch_has_wmb_pmem(void)
-{
-       return false;
-}
-#endif
-
 #endif /* _ASM_X86_CACHEFLUSH_H */
index 7cfc085..de25aad 100644 (file)
@@ -250,12 +250,6 @@ static inline void flush_write_buffers(void)
 #endif
 }
 
-static inline void __pmem *arch_memremap_pmem(resource_size_t offset,
-       unsigned long size)
-{
-       return (void __force __pmem *) ioremap_cache(offset, size);
-}
-
 #endif /* __KERNEL__ */
 
 extern void native_io_delay(void);
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
new file mode 100644 (file)
index 0000000..d8ce3ec
--- /dev/null
@@ -0,0 +1,153 @@
+/*
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __ASM_X86_PMEM_H__
+#define __ASM_X86_PMEM_H__
+
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
+#include <asm/special_insns.h>
+
+#ifdef CONFIG_ARCH_HAS_PMEM_API
+/**
+ * arch_memcpy_to_pmem - copy data to persistent memory
+ * @dst: destination buffer for the copy
+ * @src: source buffer for the copy
+ * @n: length of the copy in bytes
+ *
+ * Copy data to persistent memory media via non-temporal stores so that
+ * a subsequent arch_wmb_pmem() can flush cpu and memory controller
+ * write buffers to guarantee durability.
+ */
+static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
+               size_t n)
+{
+       int unwritten;
+
+       /*
+        * We are copying between two kernel buffers, if
+        * __copy_from_user_inatomic_nocache() returns an error (page
+        * fault) we would have already reported a general protection fault
+        * before the WARN+BUG.
+        */
+       unwritten = __copy_from_user_inatomic_nocache((void __force *) dst,
+                       (void __user *) src, n);
+       if (WARN(unwritten, "%s: fault copying %p <- %p unwritten: %d\n",
+                               __func__, dst, src, unwritten))
+               BUG();
+}
+
+/**
+ * arch_wmb_pmem - synchronize writes to persistent memory
+ *
+ * After a series of arch_memcpy_to_pmem() operations this drains data
+ * from cpu write buffers and any platform (memory controller) buffers
+ * to ensure that written data is durable on persistent memory media.
+ */
+static inline void arch_wmb_pmem(void)
+{
+       /*
+        * wmb() to 'sfence' all previous writes such that they are
+        * architecturally visible to 'pcommit'.  Note, that we've
+        * already arranged for pmem writes to avoid the cache via
+        * arch_memcpy_to_pmem().
+        */
+       wmb();
+       pcommit_sfence();
+}
+
+/**
+ * __arch_wb_cache_pmem - write back a cache range with CLWB
+ * @vaddr:     virtual start address
+ * @size:      number of bytes to write back
+ *
+ * Write back a cache range using the CLWB (cache line write back)
+ * instruction.  This function requires explicit ordering with an
+ * arch_wmb_pmem() call.  This API is internal to the x86 PMEM implementation.
+ */
+static inline void __arch_wb_cache_pmem(void *vaddr, size_t size)
+{
+       u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
+       unsigned long clflush_mask = x86_clflush_size - 1;
+       void *vend = vaddr + size;
+       void *p;
+
+       for (p = (void *)((unsigned long)vaddr & ~clflush_mask);
+            p < vend; p += x86_clflush_size)
+               clwb(p);
+}
+
+/*
+ * copy_from_iter_nocache() on x86 only uses non-temporal stores for iovec
+ * iterators, so for other types (bvec & kvec) we must do a cache write-back.
+ */
+static inline bool __iter_needs_pmem_wb(struct iov_iter *i)
+{
+       return iter_is_iovec(i) == false;
+}
+
+/**
+ * arch_copy_from_iter_pmem - copy data from an iterator to PMEM
+ * @addr:      PMEM destination address
+ * @bytes:     number of bytes to copy
+ * @i:         iterator with source data
+ *
+ * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
+ * This function requires explicit ordering with an arch_wmb_pmem() call.
+ */
+static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
+               struct iov_iter *i)
+{
+       void *vaddr = (void __force *)addr;
+       size_t len;
+
+       /* TODO: skip the write-back by always using non-temporal stores */
+       len = copy_from_iter_nocache(vaddr, bytes, i);
+
+       if (__iter_needs_pmem_wb(i))
+               __arch_wb_cache_pmem(vaddr, bytes);
+
+       return len;
+}
+
+/**
+ * arch_clear_pmem - zero a PMEM memory range
+ * @addr:      virtual start address
+ * @size:      number of bytes to zero
+ *
+ * Write zeros into the memory range starting at 'addr' for 'size' bytes.
+ * This function requires explicit ordering with an arch_wmb_pmem() call.
+ */
+static inline void arch_clear_pmem(void __pmem *addr, size_t size)
+{
+       void *vaddr = (void __force *)addr;
+
+       /* TODO: implement the zeroing via non-temporal writes */
+       if (size == PAGE_SIZE && ((unsigned long)vaddr & ~PAGE_MASK) == 0)
+               clear_page(vaddr);
+       else
+               memset(vaddr, 0, size);
+
+       __arch_wb_cache_pmem(vaddr, size);
+}
+
+static inline bool __arch_has_wmb_pmem(void)
+{
+       /*
+        * We require that wmb() be an 'sfence', that is only guaranteed on
+        * 64-bit builds
+        */
+       return static_cpu_has(X86_FEATURE_PCOMMIT);
+}
+#endif /* CONFIG_ARCH_HAS_PMEM_API */
+#endif /* __ASM_X86_PMEM_H__ */
index 0f457e6..9dafe59 100644 (file)
@@ -37,7 +37,7 @@
 /*
  * This is a non-standardized way to represent ADR or NVDIMM regions that
  * persist over a reboot.  The kernel will ignore their special capabilities
- * unless the CONFIG_X86_PMEM_LEGACY=y option is set.
+ * unless the CONFIG_X86_PMEM_LEGACY option is set.
  *
  * ( Note that older platforms also used 6 for the same type of memory,
  *   but newer versions switched to 12 as 6 was assigned differently.  Some
index 3c36221..9ffdf25 100644 (file)
@@ -94,7 +94,7 @@ obj-$(CONFIG_KVM_GUEST)               += kvm.o kvmclock.o
 obj-$(CONFIG_PARAVIRT)         += paravirt.o paravirt_patch_$(BITS).o
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
 obj-$(CONFIG_PARAVIRT_CLOCK)   += pvclock.o
-obj-$(CONFIG_X86_PMEM_LEGACY += pmem.o
+obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o
 
 obj-$(CONFIG_PCSPKR_PLATFORM)  += pcspeaker.o
 
index 64f90f5..4f00b63 100644 (file)
@@ -3,80 +3,17 @@
  * Copyright (c) 2015, Intel Corporation.
  */
 #include <linux/platform_device.h>
-#include <linux/libnvdimm.h>
 #include <linux/module.h>
-#include <asm/e820.h>
-
-static void e820_pmem_release(struct device *dev)
-{
-       struct nvdimm_bus *nvdimm_bus = dev->platform_data;
-
-       if (nvdimm_bus)
-               nvdimm_bus_unregister(nvdimm_bus);
-}
-
-static struct platform_device e820_pmem = {
-       .name = "e820_pmem",
-       .id = -1,
-       .dev = {
-               .release = e820_pmem_release,
-       },
-};
-
-static const struct attribute_group *e820_pmem_attribute_groups[] = {
-       &nvdimm_bus_attribute_group,
-       NULL,
-};
-
-static const struct attribute_group *e820_pmem_region_attribute_groups[] = {
-       &nd_region_attribute_group,
-       &nd_device_attribute_group,
-       NULL,
-};
 
 static __init int register_e820_pmem(void)
 {
-       static struct nvdimm_bus_descriptor nd_desc;
-       struct device *dev = &e820_pmem.dev;
-       struct nvdimm_bus *nvdimm_bus;
-       int rc, i;
-
-       rc = platform_device_register(&e820_pmem);
-       if (rc)
-               return rc;
-
-       nd_desc.attr_groups = e820_pmem_attribute_groups;
-       nd_desc.provider_name = "e820";
-       nvdimm_bus = nvdimm_bus_register(dev, &nd_desc);
-       if (!nvdimm_bus)
-               goto err;
-       dev->platform_data = nvdimm_bus;
-
-       for (i = 0; i < e820.nr_map; i++) {
-               struct e820entry *ei = &e820.map[i];
-               struct resource res = {
-                       .flags  = IORESOURCE_MEM,
-                       .start  = ei->addr,
-                       .end    = ei->addr + ei->size - 1,
-               };
-               struct nd_region_desc ndr_desc;
-
-               if (ei->type != E820_PRAM)
-                       continue;
-
-               memset(&ndr_desc, 0, sizeof(ndr_desc));
-               ndr_desc.res = &res;
-               ndr_desc.attr_groups = e820_pmem_region_attribute_groups;
-               ndr_desc.numa_node = NUMA_NO_NODE;
-               if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
-                       goto err;
-       }
-
-       return 0;
-
- err:
-       dev_err(dev, "failed to register legacy persistent memory ranges\n");
-       platform_device_unregister(&e820_pmem);
-       return -ENXIO;
+       struct platform_device *pdev;
+
+       /*
+        * See drivers/nvdimm/e820.c for the implementation, this is
+        * simply here to trigger the module to load on demand.
+        */
+       pdev = platform_device_alloc("e820_pmem", -1);
+       return platform_device_add(pdev);
 }
 device_initcall(register_e820_pmem);
index 68aec42..7562f42 100644 (file)
@@ -823,11 +823,11 @@ void __init mem_init(void)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size)
+int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
        struct pglist_data *pgdata = NODE_DATA(nid);
        struct zone *zone = pgdata->node_zones +
-               zone_for_memory(nid, start, size, ZONE_HIGHMEM);
+               zone_for_memory(nid, start, size, ZONE_HIGHMEM, for_device);
        unsigned long start_pfn = start >> PAGE_SHIFT;
        unsigned long nr_pages = size >> PAGE_SHIFT;
 
index 3fba623..30564e2 100644 (file)
@@ -687,11 +687,11 @@ static void  update_end_of_memory_vars(u64 start, u64 size)
  * Memory is added always to NORMAL zone. This means you will never get
  * additional DMA/DMA32 memory.
  */
-int arch_add_memory(int nid, u64 start, u64 size)
+int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
 {
        struct pglist_data *pgdat = NODE_DATA(nid);
        struct zone *zone = pgdat->node_zones +
-               zone_for_memory(nid, start, size, ZONE_NORMAL);
+               zone_for_memory(nid, start, size, ZONE_NORMAL, for_device);
        unsigned long start_pfn = start >> PAGE_SHIFT;
        unsigned long nr_pages = size >> PAGE_SHIFT;
        int ret;
index c39bb6e..867840f 100644 (file)
@@ -57,6 +57,7 @@ static inline void __iomem *ioremap_cache(unsigned long offset,
        else
                BUG();
 }
+#define ioremap_cache ioremap_cache
 
 #define ioremap_wc ioremap_nocache
 #define ioremap_wt ioremap_nocache
index 54e9729..5d1015c 100644 (file)
@@ -417,6 +417,7 @@ config ACPI_NFIT
        tristate "ACPI NVDIMM Firmware Interface Table (NFIT)"
        depends on PHYS_ADDR_T_64BIT
        depends on BLK_DEV
+       depends on ARCH_HAS_MMIO_FLUSH
        select LIBNVDIMM
        help
          Infrastructure to probe ACPI 6 compliant platforms for
index cf0fd96..c1b8d03 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/sort.h>
 #include <linux/pmem.h>
 #include <linux/io.h>
+#include <asm/cacheflush.h>
 #include "nfit.h"
 
 /*
@@ -764,9 +765,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
        struct acpi_device *adev, *adev_dimm;
        struct device *dev = acpi_desc->dev;
        const u8 *uuid = to_nfit_uuid(NFIT_DEV_DIMM);
-       unsigned long long sta;
-       int i, rc = -ENODEV;
-       acpi_status status;
+       int i;
 
        nfit_mem->dsm_mask = acpi_desc->dimm_dsm_force_en;
        adev = to_acpi_dev(acpi_desc);
@@ -781,25 +780,11 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
                return force_enable_dimms ? 0 : -ENODEV;
        }
 
-       status = acpi_evaluate_integer(adev_dimm->handle, "_STA", NULL, &sta);
-       if (status == AE_NOT_FOUND) {
-               dev_dbg(dev, "%s missing _STA, assuming enabled...\n",
-                               dev_name(&adev_dimm->dev));
-               rc = 0;
-       } else if (ACPI_FAILURE(status))
-               dev_err(dev, "%s failed to retrieve_STA, disabling...\n",
-                               dev_name(&adev_dimm->dev));
-       else if ((sta & ACPI_STA_DEVICE_ENABLED) == 0)
-               dev_info(dev, "%s disabled by firmware\n",
-                               dev_name(&adev_dimm->dev));
-       else
-               rc = 0;
-
        for (i = ND_CMD_SMART; i <= ND_CMD_VENDOR; i++)
                if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i))
                        set_bit(i, &nfit_mem->dsm_mask);
 
-       return force_enable_dimms ? 0 : rc;
+       return 0;
 }
 
 static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
@@ -868,6 +853,7 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
        struct acpi_device *adev;
        int i;
 
+       nd_desc->dsm_mask = acpi_desc->bus_dsm_force_en;
        adev = to_acpi_dev(acpi_desc);
        if (!adev)
                return;
@@ -1032,7 +1018,7 @@ static u32 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw)
        if (mmio->num_lines)
                offset = to_interleave_offset(offset, mmio);
 
-       return readl(mmio->base + offset);
+       return readl(mmio->addr.base + offset);
 }
 
 static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
@@ -1057,11 +1043,11 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
        if (mmio->num_lines)
                offset = to_interleave_offset(offset, mmio);
 
-       writeq(cmd, mmio->base + offset);
+       writeq(cmd, mmio->addr.base + offset);
        wmb_blk(nfit_blk);
 
        if (nfit_blk->dimm_flags & ND_BLK_DCR_LATCH)
-               readq(mmio->base + offset);
+               readq(mmio->addr.base + offset);
 }
 
 static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
@@ -1093,11 +1079,16 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
                }
 
                if (rw)
-                       memcpy_to_pmem(mmio->aperture + offset,
+                       memcpy_to_pmem(mmio->addr.aperture + offset,
                                        iobuf + copied, c);
-               else
+               else {
+                       if (nfit_blk->dimm_flags & ND_BLK_READ_FLUSH)
+                               mmio_flush_range((void __force *)
+                                       mmio->addr.aperture + offset, c);
+
                        memcpy_from_pmem(iobuf + copied,
-                                       mmio->aperture + offset, c);
+                                       mmio->addr.aperture + offset, c);
+               }
 
                copied += c;
                len -= c;
@@ -1144,7 +1135,10 @@ static void nfit_spa_mapping_release(struct kref *kref)
 
        WARN_ON(!mutex_is_locked(&acpi_desc->spa_map_mutex));
        dev_dbg(acpi_desc->dev, "%s: SPA%d\n", __func__, spa->range_index);
-       iounmap(spa_map->iomem);
+       if (spa_map->type == SPA_MAP_APERTURE)
+               memunmap((void __force *)spa_map->addr.aperture);
+       else
+               iounmap(spa_map->addr.base);
        release_mem_region(spa->address, spa->length);
        list_del(&spa_map->list);
        kfree(spa_map);
@@ -1190,7 +1184,7 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
        spa_map = find_spa_mapping(acpi_desc, spa);
        if (spa_map) {
                kref_get(&spa_map->kref);
-               return spa_map->iomem;
+               return spa_map->addr.base;
        }
 
        spa_map = kzalloc(sizeof(*spa_map), GFP_KERNEL);
@@ -1206,20 +1200,19 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc,
        if (!res)
                goto err_mem;
 
-       if (type == SPA_MAP_APERTURE) {
-               /*
-                * TODO: memremap_pmem() support, but that requires cache
-                * flushing when the aperture is moved.
-                */
-               spa_map->iomem = ioremap_wc(start, n);
-       } else
-               spa_map->iomem = ioremap_nocache(start, n);
+       spa_map->type = type;
+       if (type == SPA_MAP_APERTURE)
+               spa_map->addr.aperture = (void __pmem *)memremap(start, n,
+                                                       ARCH_MEMREMAP_PMEM);
+       else
+               spa_map->addr.base = ioremap_nocache(start, n);
+
 
-       if (!spa_map->iomem)
+       if (!spa_map->addr.base)
                goto err_map;
 
        list_add_tail(&spa_map->list, &acpi_desc->spa_maps);
-       return spa_map->iomem;
+       return spa_map->addr.base;
 
  err_map:
        release_mem_region(start, n);
@@ -1282,7 +1275,7 @@ static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc,
                nfit_blk->dimm_flags = flags.flags;
        else if (rc == -ENOTTY) {
                /* fall back to a conservative default */
-               nfit_blk->dimm_flags = ND_BLK_DCR_LATCH;
+               nfit_blk->dimm_flags = ND_BLK_DCR_LATCH | ND_BLK_READ_FLUSH;
                rc = 0;
        } else
                rc = -ENXIO;
@@ -1322,9 +1315,9 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
        /* map block aperture memory */
        nfit_blk->bdw_offset = nfit_mem->bdw->offset;
        mmio = &nfit_blk->mmio[BDW];
-       mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw,
+       mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw,
                        SPA_MAP_APERTURE);
-       if (!mmio->base) {
+       if (!mmio->addr.base) {
                dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
                                nvdimm_name(nvdimm));
                return -ENOMEM;
@@ -1345,9 +1338,9 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
        nfit_blk->cmd_offset = nfit_mem->dcr->command_offset;
        nfit_blk->stat_offset = nfit_mem->dcr->status_offset;
        mmio = &nfit_blk->mmio[DCR];
-       mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr,
+       mmio->addr.base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr,
                        SPA_MAP_CONTROL);
-       if (!mmio->base) {
+       if (!mmio->addr.base) {
                dev_dbg(dev, "%s: %s failed to map dcr\n", __func__,
                                nvdimm_name(nvdimm));
                return -ENOMEM;
@@ -1379,7 +1372,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
                        return -ENOMEM;
        }
 
-       if (!arch_has_pmem_api() && !nfit_blk->nvdimm_flush)
+       if (!arch_has_wmb_pmem() && !nfit_blk->nvdimm_flush)
                dev_warn(dev, "unable to guarantee persistence of writes\n");
 
        if (mmio->line_size == 0)
@@ -1414,7 +1407,7 @@ static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus,
        for (i = 0; i < 2; i++) {
                struct nfit_blk_mmio *mmio = &nfit_blk->mmio[i];
 
-               if (mmio->base)
+               if (mmio->addr.base)
                        nfit_spa_unmap(acpi_desc, mmio->spa);
        }
        nd_blk_region_set_provider_data(ndbr, NULL);
index 79b6d83..7e74015 100644 (file)
@@ -41,6 +41,7 @@ enum nfit_uuids {
 };
 
 enum {
+       ND_BLK_READ_FLUSH = 1,
        ND_BLK_DCR_LATCH = 2,
 };
 
@@ -107,6 +108,7 @@ struct acpi_nfit_desc {
        struct nvdimm_bus *nvdimm_bus;
        struct device *dev;
        unsigned long dimm_dsm_force_en;
+       unsigned long bus_dsm_force_en;
        int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
                        void *iobuf, u64 len, int rw);
 };
@@ -116,12 +118,16 @@ enum nd_blk_mmio_selector {
        DCR,
 };
 
+struct nd_blk_addr {
+       union {
+               void __iomem *base;
+               void __pmem  *aperture;
+       };
+};
+
 struct nfit_blk {
        struct nfit_blk_mmio {
-               union {
-                       void __iomem *base;
-                       void __pmem  *aperture;
-               };
+               struct nd_blk_addr addr;
                u64 size;
                u64 base_offset;
                u32 line_size;
@@ -148,7 +154,8 @@ struct nfit_spa_mapping {
        struct acpi_nfit_system_address *spa;
        struct list_head list;
        struct kref kref;
-       void __iomem *iomem;
+       enum spa_map_type type;
+       struct nd_blk_addr addr;
 };
 
 static inline struct nfit_spa_mapping *to_spa_map(struct kref *kref)
index f9ab745..b9794ae 100644 (file)
@@ -374,7 +374,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
 
 #ifdef CONFIG_BLK_DEV_RAM_DAX
 static long brd_direct_access(struct block_device *bdev, sector_t sector,
-                       void **kaddr, unsigned long *pfn, long size)
+                       void __pmem **kaddr, unsigned long *pfn)
 {
        struct brd_device *brd = bdev->bd_disk->private_data;
        struct page *page;
@@ -384,13 +384,9 @@ static long brd_direct_access(struct block_device *bdev, sector_t sector,
        page = brd_insert_page(brd, sector);
        if (!page)
                return -ENOSPC;
-       *kaddr = page_address(page);
+       *kaddr = (void __pmem *)page_address(page);
        *pfn = page_to_pfn(page);
 
-       /*
-        * TODO: If size > PAGE_SIZE, we could look to see if the next page in
-        * the file happens to be mapped to the next page of physical RAM.
-        */
        return PAGE_SIZE;
 }
 #else
index b713466..f8f2e76 100644 (file)
@@ -38,7 +38,7 @@ typedef struct icn_cdef {
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/major.h>
-#include <asm/io.h>
+#include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/slab.h>
index 2fc4957..a70eb83 100644 (file)
@@ -41,7 +41,7 @@
 #include <linux/fs.h>
 #include <linux/ioctl.h>
 #include <linux/init.h>
-#include <asm/io.h>
+#include <linux/io.h>
 
 #include <linux/mtd/mtd.h>
 
index 7da266a..0802158 100644 (file)
@@ -24,7 +24,7 @@
 #include <linux/rslib.h>
 #include <linux/moduleparam.h>
 #include <linux/slab.h>
-#include <asm/io.h>
+#include <linux/io.h>
 
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
index 32a216d..ab7bda0 100644 (file)
@@ -18,7 +18,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/onenand.h>
 #include <linux/mtd/partitions.h>
-#include <asm/io.h>
+#include <linux/io.h>
 
 /*
  * Note: Driver name and platform data format have been updated!
index 72226ac..53c1162 100644 (file)
@@ -21,6 +21,7 @@ config BLK_DEV_PMEM
        default LIBNVDIMM
        depends on HAS_IOMEM
        select ND_BTT if BTT
+       select ND_PFN if NVDIMM_PFN
        help
          Memory ranges for PMEM are described by either an NFIT
          (NVDIMM Firmware Interface Table, see CONFIG_NFIT_ACPI), a
@@ -47,12 +48,16 @@ config ND_BLK
          (CONFIG_ACPI_NFIT), or otherwise exposes BLK-mode
          capabilities.
 
+config ND_CLAIM
+       bool
+
 config ND_BTT
        tristate
 
 config BTT
        bool "BTT: Block Translation Table (atomic sector updates)"
        default y if LIBNVDIMM
+       select ND_CLAIM
        help
          The Block Translation Table (BTT) provides atomic sector
          update semantics for persistent memory devices, so that
@@ -65,4 +70,22 @@ config BTT
 
          Select Y if unsure
 
+config ND_PFN
+       tristate
+
+config NVDIMM_PFN
+       bool "PFN: Map persistent (device) memory"
+       default LIBNVDIMM
+       depends on ZONE_DEVICE
+       select ND_CLAIM
+       help
+         Map persistent memory, i.e. advertise it to the memory
+         management sub-system.  By default persistent memory does
+         not support direct I/O, RDMA, or any other usage that
+         requires a 'struct page' to mediate an I/O request.  This
+         driver allocates and initializes the infrastructure needed
+         to support those use cases.
+
+         Select Y if unsure
+
 endif
index 594bb97..ea84d3c 100644 (file)
@@ -2,6 +2,7 @@ obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
 obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
 obj-$(CONFIG_ND_BTT) += nd_btt.o
 obj-$(CONFIG_ND_BLK) += nd_blk.o
+obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
 
 nd_pmem-y := pmem.o
 
@@ -9,6 +10,8 @@ nd_btt-y := btt.o
 
 nd_blk-y := blk.o
 
+nd_e820-y := e820.o
+
 libnvdimm-y := core.o
 libnvdimm-y += bus.o
 libnvdimm-y += dimm_devs.o
@@ -17,4 +20,6 @@ libnvdimm-y += region_devs.o
 libnvdimm-y += region.o
 libnvdimm-y += namespace_devs.o
 libnvdimm-y += label.o
+libnvdimm-$(CONFIG_ND_CLAIM) += claim.o
 libnvdimm-$(CONFIG_BTT) += btt_devs.o
+libnvdimm-$(CONFIG_NVDIMM_PFN) += pfn_devs.o
index 341202e..2542397 100644 (file)
@@ -582,33 +582,6 @@ static void free_arenas(struct btt *btt)
        }
 }
 
-/*
- * This function checks if the metadata layout is valid and error free
- */
-static int arena_is_valid(struct arena_info *arena, struct btt_sb *super,
-                               u8 *uuid, u32 lbasize)
-{
-       u64 checksum;
-
-       if (memcmp(super->uuid, uuid, 16))
-               return 0;
-
-       checksum = le64_to_cpu(super->checksum);
-       super->checksum = 0;
-       if (checksum != nd_btt_sb_checksum(super))
-               return 0;
-       super->checksum = cpu_to_le64(checksum);
-
-       if (lbasize != le32_to_cpu(super->external_lbasize))
-               return 0;
-
-       /* TODO: figure out action for this */
-       if ((le32_to_cpu(super->flags) & IB_FLAG_ERROR_MASK) != 0)
-               dev_info(to_dev(arena), "Found arena with an error flag\n");
-
-       return 1;
-}
-
 /*
  * This function reads an existing valid btt superblock and
  * populates the corresponding arena_info struct
@@ -632,8 +605,9 @@ static void parse_arena_meta(struct arena_info *arena, struct btt_sb *super,
        arena->logoff = arena_off + le64_to_cpu(super->logoff);
        arena->info2off = arena_off + le64_to_cpu(super->info2off);
 
-       arena->size = (super->nextoff > 0) ? (le64_to_cpu(super->nextoff)) :
-                       (arena->info2off - arena->infooff + BTT_PG_SIZE);
+       arena->size = (le64_to_cpu(super->nextoff) > 0)
+               ? (le64_to_cpu(super->nextoff))
+               : (arena->info2off - arena->infooff + BTT_PG_SIZE);
 
        arena->flags = le32_to_cpu(super->flags);
 }
@@ -665,8 +639,7 @@ static int discover_arenas(struct btt *btt)
                if (ret)
                        goto out;
 
-               if (!arena_is_valid(arena, super, btt->nd_btt->uuid,
-                               btt->lbasize)) {
+               if (!nd_btt_arena_is_valid(btt->nd_btt, super)) {
                        if (remaining == btt->rawsize) {
                                btt->init_state = INIT_NOTFOUND;
                                dev_info(to_dev(arena), "No existing arenas\n");
@@ -755,10 +728,13 @@ static int create_arenas(struct btt *btt)
  * It is only called for an uninitialized arena when a write
  * to that arena occurs for the first time.
  */
-static int btt_arena_write_layout(struct arena_info *arena, u8 *uuid)
+static int btt_arena_write_layout(struct arena_info *arena)
 {
        int ret;
+       u64 sum;
        struct btt_sb *super;
+       struct nd_btt *nd_btt = arena->nd_btt;
+       const u8 *parent_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev);
 
        ret = btt_map_init(arena);
        if (ret)
@@ -773,7 +749,8 @@ static int btt_arena_write_layout(struct arena_info *arena, u8 *uuid)
                return -ENOMEM;
 
        strncpy(super->signature, BTT_SIG, BTT_SIG_LEN);
-       memcpy(super->uuid, uuid, 16);
+       memcpy(super->uuid, nd_btt->uuid, 16);
+       memcpy(super->parent_uuid, parent_uuid, 16);
        super->flags = cpu_to_le32(arena->flags);
        super->version_major = cpu_to_le16(arena->version_major);
        super->version_minor = cpu_to_le16(arena->version_minor);
@@ -794,7 +771,8 @@ static int btt_arena_write_layout(struct arena_info *arena, u8 *uuid)
        super->info2off = cpu_to_le64(arena->info2off - arena->infooff);
 
        super->flags = 0;
-       super->checksum = cpu_to_le64(nd_btt_sb_checksum(super));
+       sum = nd_sb_checksum((struct nd_gen_sb *) super);
+       super->checksum = cpu_to_le64(sum);
 
        ret = btt_info_write(arena, super);
 
@@ -813,7 +791,7 @@ static int btt_meta_init(struct btt *btt)
 
        mutex_lock(&btt->init_lock);
        list_for_each_entry(arena, &btt->arena_list, list) {
-               ret = btt_arena_write_layout(arena, btt->nd_btt->uuid);
+               ret = btt_arena_write_layout(arena);
                if (ret)
                        goto unlock;
 
@@ -1447,8 +1425,6 @@ static int __init nd_btt_init(void)
 {
        int rc;
 
-       BUILD_BUG_ON(sizeof(struct btt_sb) != SZ_4K);
-
        btt_major = register_blkdev(0, "btt");
        if (btt_major < 0)
                return btt_major;
index 75b0d80..b2f8651 100644 (file)
@@ -182,4 +182,7 @@ struct btt {
        int init_state;
        int num_arenas;
 };
+
+bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super);
+
 #endif
index 6ac8c0f..59ad54a 100644 (file)
 #include "btt.h"
 #include "nd.h"
 
-static void __nd_btt_detach_ndns(struct nd_btt *nd_btt)
-{
-       struct nd_namespace_common *ndns = nd_btt->ndns;
-
-       dev_WARN_ONCE(&nd_btt->dev, !mutex_is_locked(&ndns->dev.mutex)
-                       || ndns->claim != &nd_btt->dev,
-                       "%s: invalid claim\n", __func__);
-       ndns->claim = NULL;
-       nd_btt->ndns = NULL;
-       put_device(&ndns->dev);
-}
-
-static void nd_btt_detach_ndns(struct nd_btt *nd_btt)
-{
-       struct nd_namespace_common *ndns = nd_btt->ndns;
-
-       if (!ndns)
-               return;
-       get_device(&ndns->dev);
-       device_lock(&ndns->dev);
-       __nd_btt_detach_ndns(nd_btt);
-       device_unlock(&ndns->dev);
-       put_device(&ndns->dev);
-}
-
-static bool __nd_btt_attach_ndns(struct nd_btt *nd_btt,
-               struct nd_namespace_common *ndns)
-{
-       if (ndns->claim)
-               return false;
-       dev_WARN_ONCE(&nd_btt->dev, !mutex_is_locked(&ndns->dev.mutex)
-                       || nd_btt->ndns,
-                       "%s: invalid claim\n", __func__);
-       ndns->claim = &nd_btt->dev;
-       nd_btt->ndns = ndns;
-       get_device(&ndns->dev);
-       return true;
-}
-
-static bool nd_btt_attach_ndns(struct nd_btt *nd_btt,
-               struct nd_namespace_common *ndns)
-{
-       bool claimed;
-
-       device_lock(&ndns->dev);
-       claimed = __nd_btt_attach_ndns(nd_btt, ndns);
-       device_unlock(&ndns->dev);
-       return claimed;
-}
-
 static void nd_btt_release(struct device *dev)
 {
        struct nd_region *nd_region = to_nd_region(dev->parent);
        struct nd_btt *nd_btt = to_nd_btt(dev);
 
        dev_dbg(dev, "%s\n", __func__);
-       nd_btt_detach_ndns(nd_btt);
+       nd_detach_ndns(&nd_btt->dev, &nd_btt->ndns);
        ida_simple_remove(&nd_region->btt_ida, nd_btt->id);
        kfree(nd_btt->uuid);
        kfree(nd_btt);
@@ -172,104 +122,15 @@ static ssize_t namespace_show(struct device *dev,
        return rc;
 }
 
-static int namespace_match(struct device *dev, void *data)
-{
-       char *name = data;
-
-       return strcmp(name, dev_name(dev)) == 0;
-}
-
-static bool is_nd_btt_idle(struct device *dev)
-{
-       struct nd_region *nd_region = to_nd_region(dev->parent);
-       struct nd_btt *nd_btt = to_nd_btt(dev);
-
-       if (nd_region->btt_seed == dev || nd_btt->ndns || dev->driver)
-               return false;
-       return true;
-}
-
-static ssize_t __namespace_store(struct device *dev,
-               struct device_attribute *attr, const char *buf, size_t len)
-{
-       struct nd_btt *nd_btt = to_nd_btt(dev);
-       struct nd_namespace_common *ndns;
-       struct device *found;
-       char *name;
-
-       if (dev->driver) {
-               dev_dbg(dev, "%s: -EBUSY\n", __func__);
-               return -EBUSY;
-       }
-
-       name = kstrndup(buf, len, GFP_KERNEL);
-       if (!name)
-               return -ENOMEM;
-       strim(name);
-
-       if (strncmp(name, "namespace", 9) == 0 || strcmp(name, "") == 0)
-               /* pass */;
-       else {
-               len = -EINVAL;
-               goto out;
-       }
-
-       ndns = nd_btt->ndns;
-       if (strcmp(name, "") == 0) {
-               /* detach the namespace and destroy / reset the btt device */
-               nd_btt_detach_ndns(nd_btt);
-               if (is_nd_btt_idle(dev))
-                       nd_device_unregister(dev, ND_ASYNC);
-               else {
-                       nd_btt->lbasize = 0;
-                       kfree(nd_btt->uuid);
-                       nd_btt->uuid = NULL;
-               }
-               goto out;
-       } else if (ndns) {
-               dev_dbg(dev, "namespace already set to: %s\n",
-                               dev_name(&ndns->dev));
-               len = -EBUSY;
-               goto out;
-       }
-
-       found = device_find_child(dev->parent, name, namespace_match);
-       if (!found) {
-               dev_dbg(dev, "'%s' not found under %s\n", name,
-                               dev_name(dev->parent));
-               len = -ENODEV;
-               goto out;
-       }
-
-       ndns = to_ndns(found);
-       if (__nvdimm_namespace_capacity(ndns) < SZ_16M) {
-               dev_dbg(dev, "%s too small to host btt\n", name);
-               len = -ENXIO;
-               goto out_attach;
-       }
-
-       WARN_ON_ONCE(!is_nvdimm_bus_locked(&nd_btt->dev));
-       if (!nd_btt_attach_ndns(nd_btt, ndns)) {
-               dev_dbg(dev, "%s already claimed\n",
-                               dev_name(&ndns->dev));
-               len = -EBUSY;
-       }
-
- out_attach:
-       put_device(&ndns->dev); /* from device_find_child */
- out:
-       kfree(name);
-       return len;
-}
-
 static ssize_t namespace_store(struct device *dev,
                struct device_attribute *attr, const char *buf, size_t len)
 {
+       struct nd_btt *nd_btt = to_nd_btt(dev);
        ssize_t rc;
 
        nvdimm_bus_lock(dev);
        device_lock(dev);
-       rc = __namespace_store(dev, attr, buf, len);
+       rc = nd_namespace_store(dev, &nd_btt->ndns, buf, len);
        dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
                        rc, buf, buf[len - 1] == '\n' ? "" : "\n");
        device_unlock(dev);
@@ -324,7 +185,7 @@ static struct device *__nd_btt_create(struct nd_region *nd_region,
        dev->type = &nd_btt_device_type;
        dev->groups = nd_btt_attribute_groups;
        device_initialize(&nd_btt->dev);
-       if (ndns && !__nd_btt_attach_ndns(nd_btt, ndns)) {
+       if (ndns && !__nd_attach_ndns(&nd_btt->dev, ndns, &nd_btt->ndns)) {
                dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n",
                                __func__, dev_name(ndns->claim));
                put_device(dev);
@@ -342,30 +203,54 @@ struct device *nd_btt_create(struct nd_region *nd_region)
        return dev;
 }
 
-/*
- * nd_btt_sb_checksum: compute checksum for btt info block
+static bool uuid_is_null(u8 *uuid)
+{
+       static const u8 null_uuid[16];
+
+       return (memcmp(uuid, null_uuid, 16) == 0);
+}
+
+/**
+ * nd_btt_arena_is_valid - check if the metadata layout is valid
+ * @nd_btt:    device with BTT geometry and backing device info
+ * @super:     pointer to the arena's info block being tested
+ *
+ * Check consistency of the btt info block with itself by validating
+ * the checksum, and with the parent namespace by verifying the
+ * parent_uuid contained in the info block with the one supplied in.
  *
- * Returns a fletcher64 checksum of everything in the given info block
- * except the last field (since that's where the checksum lives).
+ * Returns:
+ * false for an invalid info block, true for a valid one
  */
-u64 nd_btt_sb_checksum(struct btt_sb *btt_sb)
+bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super)
 {
-       u64 sum;
-       __le64 sum_save;
-
-       sum_save = btt_sb->checksum;
-       btt_sb->checksum = 0;
-       sum = nd_fletcher64(btt_sb, sizeof(*btt_sb), 1);
-       btt_sb->checksum = sum_save;
-       return sum;
+       const u8 *parent_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev);
+       u64 checksum;
+
+       if (memcmp(super->signature, BTT_SIG, BTT_SIG_LEN) != 0)
+               return false;
+
+       if (!uuid_is_null(super->parent_uuid))
+               if (memcmp(super->parent_uuid, parent_uuid, 16) != 0)
+                       return false;
+
+       checksum = le64_to_cpu(super->checksum);
+       super->checksum = 0;
+       if (checksum != nd_sb_checksum((struct nd_gen_sb *) super))
+               return false;
+       super->checksum = cpu_to_le64(checksum);
+
+       /* TODO: figure out action for this */
+       if ((le32_to_cpu(super->flags) & IB_FLAG_ERROR_MASK) != 0)
+               dev_info(&nd_btt->dev, "Found arena with an error flag\n");
+
+       return true;
 }
-EXPORT_SYMBOL(nd_btt_sb_checksum);
+EXPORT_SYMBOL(nd_btt_arena_is_valid);
 
 static int __nd_btt_probe(struct nd_btt *nd_btt,
                struct nd_namespace_common *ndns, struct btt_sb *btt_sb)
 {
-       u64 checksum;
-
        if (!btt_sb || !ndns || !nd_btt)
                return -ENODEV;
 
@@ -375,14 +260,8 @@ static int __nd_btt_probe(struct nd_btt *nd_btt,
        if (nvdimm_namespace_capacity(ndns) < SZ_16M)
                return -ENXIO;
 
-       if (memcmp(btt_sb->signature, BTT_SIG, BTT_SIG_LEN) != 0)
-               return -ENODEV;
-
-       checksum = le64_to_cpu(btt_sb->checksum);
-       btt_sb->checksum = 0;
-       if (checksum != nd_btt_sb_checksum(btt_sb))
+       if (!nd_btt_arena_is_valid(nd_btt, btt_sb))
                return -ENODEV;
-       btt_sb->checksum = cpu_to_le64(checksum);
 
        nd_btt->lbasize = le32_to_cpu(btt_sb->external_lbasize);
        nd_btt->uuid = kmemdup(btt_sb->uuid, 16, GFP_KERNEL);
@@ -416,7 +295,9 @@ int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata)
        dev_dbg(&ndns->dev, "%s: btt: %s\n", __func__,
                        rc == 0 ? dev_name(dev) : "<none>");
        if (rc < 0) {
-               __nd_btt_detach_ndns(to_nd_btt(dev));
+               struct nd_btt *nd_btt = to_nd_btt(dev);
+
+               __nd_detach_ndns(dev, &nd_btt->ndns);
                put_device(dev);
        }
 
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
new file mode 100644 (file)
index 0000000..e8f03b0
--- /dev/null
@@ -0,0 +1,201 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/device.h>
+#include <linux/sizes.h>
+#include "nd-core.h"
+#include "pfn.h"
+#include "btt.h"
+#include "nd.h"
+
+void __nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns)
+{
+       struct nd_namespace_common *ndns = *_ndns;
+
+       dev_WARN_ONCE(dev, !mutex_is_locked(&ndns->dev.mutex)
+                       || ndns->claim != dev,
+                       "%s: invalid claim\n", __func__);
+       ndns->claim = NULL;
+       *_ndns = NULL;
+       put_device(&ndns->dev);
+}
+
+void nd_detach_ndns(struct device *dev,
+               struct nd_namespace_common **_ndns)
+{
+       struct nd_namespace_common *ndns = *_ndns;
+
+       if (!ndns)
+               return;
+       get_device(&ndns->dev);
+       device_lock(&ndns->dev);
+       __nd_detach_ndns(dev, _ndns);
+       device_unlock(&ndns->dev);
+       put_device(&ndns->dev);
+}
+
+bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach,
+               struct nd_namespace_common **_ndns)
+{
+       if (attach->claim)
+               return false;
+       dev_WARN_ONCE(dev, !mutex_is_locked(&attach->dev.mutex)
+                       || *_ndns,
+                       "%s: invalid claim\n", __func__);
+       attach->claim = dev;
+       *_ndns = attach;
+       get_device(&attach->dev);
+       return true;
+}
+
+bool nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach,
+               struct nd_namespace_common **_ndns)
+{
+       bool claimed;
+
+       device_lock(&attach->dev);
+       claimed = __nd_attach_ndns(dev, attach, _ndns);
+       device_unlock(&attach->dev);
+       return claimed;
+}
+
+static int namespace_match(struct device *dev, void *data)
+{
+       char *name = data;
+
+       return strcmp(name, dev_name(dev)) == 0;
+}
+
+static bool is_idle(struct device *dev, struct nd_namespace_common *ndns)
+{
+       struct nd_region *nd_region = to_nd_region(dev->parent);
+       struct device *seed = NULL;
+
+       if (is_nd_btt(dev))
+               seed = nd_region->btt_seed;
+       else if (is_nd_pfn(dev))
+               seed = nd_region->pfn_seed;
+
+       if (seed == dev || ndns || dev->driver)
+               return false;
+       return true;
+}
+
+static void nd_detach_and_reset(struct device *dev,
+               struct nd_namespace_common **_ndns)
+{
+       /* detach the namespace and destroy / reset the device */
+       nd_detach_ndns(dev, _ndns);
+       if (is_idle(dev, *_ndns)) {
+               nd_device_unregister(dev, ND_ASYNC);
+       } else if (is_nd_btt(dev)) {
+               struct nd_btt *nd_btt = to_nd_btt(dev);
+
+               nd_btt->lbasize = 0;
+               kfree(nd_btt->uuid);
+               nd_btt->uuid = NULL;
+       } else if (is_nd_pfn(dev)) {
+               struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+
+               kfree(nd_pfn->uuid);
+               nd_pfn->uuid = NULL;
+               nd_pfn->mode = PFN_MODE_NONE;
+       }
+}
+
+ssize_t nd_namespace_store(struct device *dev,
+               struct nd_namespace_common **_ndns, const char *buf,
+               size_t len)
+{
+       struct nd_namespace_common *ndns;
+       struct device *found;
+       char *name;
+
+       if (dev->driver) {
+               dev_dbg(dev, "%s: -EBUSY\n", __func__);
+               return -EBUSY;
+       }
+
+       name = kstrndup(buf, len, GFP_KERNEL);
+       if (!name)
+               return -ENOMEM;
+       strim(name);
+
+       if (strncmp(name, "namespace", 9) == 0 || strcmp(name, "") == 0)
+               /* pass */;
+       else {
+               len = -EINVAL;
+               goto out;
+       }
+
+       ndns = *_ndns;
+       if (strcmp(name, "") == 0) {
+               nd_detach_and_reset(dev, _ndns);
+               goto out;
+       } else if (ndns) {
+               dev_dbg(dev, "namespace already set to: %s\n",
+                               dev_name(&ndns->dev));
+               len = -EBUSY;
+               goto out;
+       }
+
+       found = device_find_child(dev->parent, name, namespace_match);
+       if (!found) {
+               dev_dbg(dev, "'%s' not found under %s\n", name,
+                               dev_name(dev->parent));
+               len = -ENODEV;
+               goto out;
+       }
+
+       ndns = to_ndns(found);
+       if (__nvdimm_namespace_capacity(ndns) < SZ_16M) {
+               dev_dbg(dev, "%s too small to host\n", name);
+               len = -ENXIO;
+               goto out_attach;
+       }
+
+       WARN_ON_ONCE(!is_nvdimm_bus_locked(dev));
+       if (!nd_attach_ndns(dev, ndns, _ndns)) {
+               dev_dbg(dev, "%s already claimed\n",
+                               dev_name(&ndns->dev));
+               len = -EBUSY;
+       }
+
+ out_attach:
+       put_device(&ndns->dev); /* from device_find_child */
+ out:
+       kfree(name);
+       return len;
+}
+
+/*
+ * nd_sb_checksum: compute checksum for a generic info block
+ *
+ * Returns a fletcher64 checksum of everything in the given info block
+ * except the last field (since that's where the checksum lives).
+ */
+u64 nd_sb_checksum(struct nd_gen_sb *nd_gen_sb)
+{
+       u64 sum;
+       __le64 sum_save;
+
+       BUILD_BUG_ON(sizeof(struct btt_sb) != SZ_4K);
+       BUILD_BUG_ON(sizeof(struct nd_pfn_sb) != SZ_4K);
+       BUILD_BUG_ON(sizeof(struct nd_gen_sb) != SZ_4K);
+
+       sum_save = nd_gen_sb->checksum;
+       nd_gen_sb->checksum = 0;
+       sum = nd_fletcher64(nd_gen_sb, sizeof(*nd_gen_sb), 1);
+       nd_gen_sb->checksum = sum_save;
+       return sum;
+}
+EXPORT_SYMBOL(nd_sb_checksum);
index c05eb80..651b8d1 100644 (file)
@@ -241,10 +241,7 @@ void nvdimm_drvdata_release(struct kref *kref)
                nvdimm_free_dpa(ndd, res);
        nvdimm_bus_unlock(dev);
 
-       if (ndd->data && is_vmalloc_addr(ndd->data))
-               vfree(ndd->data);
-       else
-               kfree(ndd->data);
+       kvfree(ndd->data);
        kfree(ndd);
        put_device(dev);
 }
diff --git a/drivers/nvdimm/e820.c b/drivers/nvdimm/e820.c
new file mode 100644 (file)
index 0000000..8282db2
--- /dev/null
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2015, Christoph Hellwig.
+ * Copyright (c) 2015, Intel Corporation.
+ */
+#include <linux/platform_device.h>
+#include <linux/libnvdimm.h>
+#include <linux/module.h>
+
+static const struct attribute_group *e820_pmem_attribute_groups[] = {
+       &nvdimm_bus_attribute_group,
+       NULL,
+};
+
+static const struct attribute_group *e820_pmem_region_attribute_groups[] = {
+       &nd_region_attribute_group,
+       &nd_device_attribute_group,
+       NULL,
+};
+
+static int e820_pmem_remove(struct platform_device *pdev)
+{
+       struct nvdimm_bus *nvdimm_bus = platform_get_drvdata(pdev);
+
+       nvdimm_bus_unregister(nvdimm_bus);
+       return 0;
+}
+
+static int e820_pmem_probe(struct platform_device *pdev)
+{
+       static struct nvdimm_bus_descriptor nd_desc;
+       struct device *dev = &pdev->dev;
+       struct nvdimm_bus *nvdimm_bus;
+       struct resource *p;
+
+       nd_desc.attr_groups = e820_pmem_attribute_groups;
+       nd_desc.provider_name = "e820";
+       nvdimm_bus = nvdimm_bus_register(dev, &nd_desc);
+       if (!nvdimm_bus)
+               goto err;
+       platform_set_drvdata(pdev, nvdimm_bus);
+
+       for (p = iomem_resource.child; p ; p = p->sibling) {
+               struct nd_region_desc ndr_desc;
+
+               if (strncmp(p->name, "Persistent Memory (legacy)", 26) != 0)
+                       continue;
+
+               memset(&ndr_desc, 0, sizeof(ndr_desc));
+               ndr_desc.res = p;
+               ndr_desc.attr_groups = e820_pmem_region_attribute_groups;
+               ndr_desc.numa_node = NUMA_NO_NODE;
+               set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
+               if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
+                       goto err;
+       }
+
+       return 0;
+
+ err:
+       nvdimm_bus_unregister(nvdimm_bus);
+       dev_err(dev, "failed to register legacy persistent memory ranges\n");
+       return -ENXIO;
+}
+
+static struct platform_driver e820_pmem_driver = {
+       .probe = e820_pmem_probe,
+       .remove = e820_pmem_remove,
+       .driver = {
+               .name = "e820_pmem",
+       },
+};
+
+static __init int e820_pmem_init(void)
+{
+       return platform_driver_register(&e820_pmem_driver);
+}
+
+static __exit void e820_pmem_exit(void)
+{
+       platform_driver_unregister(&e820_pmem_driver);
+}
+
+MODULE_ALIAS("platform:e820_pmem*");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Intel Corporation");
+module_init(e820_pmem_init);
+module_exit(e820_pmem_exit);
index fef0dd8..0955b2c 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/slab.h>
+#include <linux/pmem.h>
 #include <linux/nd.h>
 #include "nd-core.h"
 #include "nd.h"
@@ -76,22 +77,54 @@ static bool is_namespace_io(struct device *dev)
        return dev ? dev->type == &namespace_io_device_type : false;
 }
 
+bool pmem_should_map_pages(struct device *dev)
+{
+       struct nd_region *nd_region = to_nd_region(dev->parent);
+
+       if (!IS_ENABLED(CONFIG_ZONE_DEVICE))
+               return false;
+
+       if (!test_bit(ND_REGION_PAGEMAP, &nd_region->flags))
+               return false;
+
+       if (is_nd_pfn(dev) || is_nd_btt(dev))
+               return false;
+
+#ifdef ARCH_MEMREMAP_PMEM
+       return ARCH_MEMREMAP_PMEM == MEMREMAP_WB;
+#else
+       return false;
+#endif
+}
+EXPORT_SYMBOL(pmem_should_map_pages);
+
 const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
                char *name)
 {
        struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
-       const char *suffix = "";
+       const char *suffix = NULL;
 
-       if (ndns->claim && is_nd_btt(ndns->claim))
-               suffix = "s";
+       if (ndns->claim) {
+               if (is_nd_btt(ndns->claim))
+                       suffix = "s";
+               else if (is_nd_pfn(ndns->claim))
+                       suffix = "m";
+               else
+                       dev_WARN_ONCE(&ndns->dev, 1,
+                                       "unknown claim type by %s\n",
+                                       dev_name(ndns->claim));
+       }
 
-       if (is_namespace_pmem(&ndns->dev) || is_namespace_io(&ndns->dev))
-               sprintf(name, "pmem%d%s", nd_region->id, suffix);
-       else if (is_namespace_blk(&ndns->dev)) {
+       if (is_namespace_pmem(&ndns->dev) || is_namespace_io(&ndns->dev)) {
+               if (!suffix && pmem_should_map_pages(&ndns->dev))
+                       suffix = "m";
+               sprintf(name, "pmem%d%s", nd_region->id, suffix ? suffix : "");
+       } else if (is_namespace_blk(&ndns->dev)) {
                struct nd_namespace_blk *nsblk;
 
                nsblk = to_nd_namespace_blk(&ndns->dev);
-               sprintf(name, "ndblk%d.%d%s", nd_region->id, nsblk->id, suffix);
+               sprintf(name, "ndblk%d.%d%s", nd_region->id, nsblk->id,
+                               suffix ? suffix : "");
        } else {
                return NULL;
        }
@@ -100,6 +133,26 @@ const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
 }
 EXPORT_SYMBOL(nvdimm_namespace_disk_name);
 
+const u8 *nd_dev_to_uuid(struct device *dev)
+{
+       static const u8 null_uuid[16];
+
+       if (!dev)
+               return null_uuid;
+
+       if (is_namespace_pmem(dev)) {
+               struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+               return nspm->uuid;
+       } else if (is_namespace_blk(dev)) {
+               struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+               return nsblk->uuid;
+       } else
+               return null_uuid;
+}
+EXPORT_SYMBOL(nd_dev_to_uuid);
+
 static ssize_t nstype_show(struct device *dev,
                struct device_attribute *attr, char *buf)
 {
@@ -1235,12 +1288,22 @@ static const struct attribute_group *nd_namespace_attribute_groups[] = {
 struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev)
 {
        struct nd_btt *nd_btt = is_nd_btt(dev) ? to_nd_btt(dev) : NULL;
+       struct nd_pfn *nd_pfn = is_nd_pfn(dev) ? to_nd_pfn(dev) : NULL;
        struct nd_namespace_common *ndns;
        resource_size_t size;
 
-       if (nd_btt) {
-               ndns = nd_btt->ndns;
-               if (!ndns)
+       if (nd_btt || nd_pfn) {
+               struct device *host = NULL;
+
+               if (nd_btt) {
+                       host = &nd_btt->dev;
+                       ndns = nd_btt->ndns;
+               } else if (nd_pfn) {
+                       host = &nd_pfn->dev;
+                       ndns = nd_pfn->ndns;
+               }
+
+               if (!ndns || !host)
                        return ERR_PTR(-ENODEV);
 
                /*
@@ -1251,12 +1314,12 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev)
                device_unlock(&ndns->dev);
                if (ndns->dev.driver) {
                        dev_dbg(&ndns->dev, "is active, can't bind %s\n",
-                                       dev_name(&nd_btt->dev));
+                                       dev_name(host));
                        return ERR_PTR(-EBUSY);
                }
-               if (dev_WARN_ONCE(&ndns->dev, ndns->claim != &nd_btt->dev,
+               if (dev_WARN_ONCE(&ndns->dev, ndns->claim != host,
                                        "host (%s) vs claim (%s) mismatch\n",
-                                       dev_name(&nd_btt->dev),
+                                       dev_name(host),
                                        dev_name(ndns->claim)))
                        return ERR_PTR(-ENXIO);
        } else {
index e1970c7..159aed5 100644 (file)
@@ -80,4 +80,13 @@ struct resource *nsblk_add_resource(struct nd_region *nd_region,
 int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd);
 void get_ndd(struct nvdimm_drvdata *ndd);
 resource_size_t __nvdimm_namespace_capacity(struct nd_namespace_common *ndns);
+void nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns);
+void __nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns);
+bool nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach,
+               struct nd_namespace_common **_ndns);
+bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach,
+               struct nd_namespace_common **_ndns);
+ssize_t nd_namespace_store(struct device *dev,
+               struct nd_namespace_common **_ndns, const char *buf,
+               size_t len);
 #endif /* __ND_CORE_H__ */
index c41f53e..417e521 100644 (file)
@@ -29,6 +29,13 @@ enum {
        ND_MAX_LANES = 256,
        SECTOR_SHIFT = 9,
        INT_LBASIZE_ALIGNMENT = 64,
+#if IS_ENABLED(CONFIG_NVDIMM_PFN)
+       ND_PFN_ALIGN = PAGES_PER_SECTION * PAGE_SIZE,
+       ND_PFN_MASK = ND_PFN_ALIGN - 1,
+#else
+       ND_PFN_ALIGN = 0,
+       ND_PFN_MASK = 0,
+#endif
 };
 
 struct nvdimm_drvdata {
@@ -92,8 +99,11 @@ struct nd_region {
        struct device dev;
        struct ida ns_ida;
        struct ida btt_ida;
+       struct ida pfn_ida;
+       unsigned long flags;
        struct device *ns_seed;
        struct device *btt_seed;
+       struct device *pfn_seed;
        u16 ndr_mappings;
        u64 ndr_size;
        u64 ndr_start;
@@ -133,6 +143,22 @@ struct nd_btt {
        int id;
 };
 
+enum nd_pfn_mode {
+       PFN_MODE_NONE,
+       PFN_MODE_RAM,
+       PFN_MODE_PMEM,
+};
+
+struct nd_pfn {
+       int id;
+       u8 *uuid;
+       struct device dev;
+       unsigned long npfns;
+       enum nd_pfn_mode mode;
+       struct nd_pfn_sb *pfn_sb;
+       struct nd_namespace_common *ndns;
+};
+
 enum nd_async_mode {
        ND_SYNC,
        ND_ASYNC,
@@ -159,14 +185,19 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd);
 int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
                void *buf, size_t len);
 struct nd_btt *to_nd_btt(struct device *dev);
-struct btt_sb;
-u64 nd_btt_sb_checksum(struct btt_sb *btt_sb);
+
+struct nd_gen_sb {
+       char reserved[SZ_4K - 8];
+       __le64 checksum;
+};
+
+u64 nd_sb_checksum(struct nd_gen_sb *sb);
 #if IS_ENABLED(CONFIG_BTT)
 int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata);
 bool is_nd_btt(struct device *dev);
 struct device *nd_btt_create(struct nd_region *nd_region);
 #else
-static inline nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata)
+static inline int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata)
 {
        return -ENODEV;
 }
@@ -180,8 +211,36 @@ static inline struct device *nd_btt_create(struct nd_region *nd_region)
 {
        return NULL;
 }
+#endif
 
+struct nd_pfn *to_nd_pfn(struct device *dev);
+#if IS_ENABLED(CONFIG_NVDIMM_PFN)
+int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata);
+bool is_nd_pfn(struct device *dev);
+struct device *nd_pfn_create(struct nd_region *nd_region);
+int nd_pfn_validate(struct nd_pfn *nd_pfn);
+#else
+static inline int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata)
+{
+       return -ENODEV;
+}
+
+static inline bool is_nd_pfn(struct device *dev)
+{
+       return false;
+}
+
+static inline struct device *nd_pfn_create(struct nd_region *nd_region)
+{
+       return NULL;
+}
+
+static inline int nd_pfn_validate(struct nd_pfn *nd_pfn)
+{
+       return -ENODEV;
+}
 #endif
+
 struct nd_region *to_nd_region(struct device *dev);
 int nd_region_to_nstype(struct nd_region *nd_region);
 int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
@@ -217,4 +276,6 @@ static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
 }
 void nd_iostat_end(struct bio *bio, unsigned long start);
 resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk);
+const u8 *nd_dev_to_uuid(struct device *dev);
+bool pmem_should_map_pages(struct device *dev);
 #endif /* __ND_H__ */
diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h
new file mode 100644 (file)
index 0000000..cc24375
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2014-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __NVDIMM_PFN_H
+#define __NVDIMM_PFN_H
+
+#include <linux/types.h>
+
+#define PFN_SIG_LEN 16
+#define PFN_SIG "NVDIMM_PFN_INFO\0"
+
+struct nd_pfn_sb {
+       u8 signature[PFN_SIG_LEN];
+       u8 uuid[16];
+       u8 parent_uuid[16];
+       __le32 flags;
+       __le16 version_major;
+       __le16 version_minor;
+       __le64 dataoff;
+       __le64 npfns;
+       __le32 mode;
+       u8 padding[4012];
+       __le64 checksum;
+};
+#endif /* __NVDIMM_PFN_H */
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
new file mode 100644 (file)
index 0000000..3fd7d0d
--- /dev/null
@@ -0,0 +1,337 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/blkdev.h>
+#include <linux/device.h>
+#include <linux/genhd.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include "nd-core.h"
+#include "pfn.h"
+#include "nd.h"
+
+static void nd_pfn_release(struct device *dev)
+{
+       struct nd_region *nd_region = to_nd_region(dev->parent);
+       struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+
+       dev_dbg(dev, "%s\n", __func__);
+       nd_detach_ndns(&nd_pfn->dev, &nd_pfn->ndns);
+       ida_simple_remove(&nd_region->pfn_ida, nd_pfn->id);
+       kfree(nd_pfn->uuid);
+       kfree(nd_pfn);
+}
+
+static struct device_type nd_pfn_device_type = {
+       .name = "nd_pfn",
+       .release = nd_pfn_release,
+};
+
+bool is_nd_pfn(struct device *dev)
+{
+       return dev ? dev->type == &nd_pfn_device_type : false;
+}
+EXPORT_SYMBOL(is_nd_pfn);
+
+struct nd_pfn *to_nd_pfn(struct device *dev)
+{
+       struct nd_pfn *nd_pfn = container_of(dev, struct nd_pfn, dev);
+
+       WARN_ON(!is_nd_pfn(dev));
+       return nd_pfn;
+}
+EXPORT_SYMBOL(to_nd_pfn);
+
+static ssize_t mode_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+
+       switch (nd_pfn->mode) {
+       case PFN_MODE_RAM:
+               return sprintf(buf, "ram\n");
+       case PFN_MODE_PMEM:
+               return sprintf(buf, "pmem\n");
+       default:
+               return sprintf(buf, "none\n");
+       }
+}
+
+static ssize_t mode_store(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t len)
+{
+       struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+       ssize_t rc = 0;
+
+       device_lock(dev);
+       nvdimm_bus_lock(dev);
+       if (dev->driver)
+               rc = -EBUSY;
+       else {
+               size_t n = len - 1;
+
+               if (strncmp(buf, "pmem\n", n) == 0
+                               || strncmp(buf, "pmem", n) == 0) {
+                       /* TODO: allocate from PMEM support */
+                       rc = -ENOTTY;
+               } else if (strncmp(buf, "ram\n", n) == 0
+                               || strncmp(buf, "ram", n) == 0)
+                       nd_pfn->mode = PFN_MODE_RAM;
+               else if (strncmp(buf, "none\n", n) == 0
+                               || strncmp(buf, "none", n) == 0)
+                       nd_pfn->mode = PFN_MODE_NONE;
+               else
+                       rc = -EINVAL;
+       }
+       dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+                       rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+       nvdimm_bus_unlock(dev);
+       device_unlock(dev);
+
+       return rc ? rc : len;
+}
+static DEVICE_ATTR_RW(mode);
+
+static ssize_t uuid_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+
+       if (nd_pfn->uuid)
+               return sprintf(buf, "%pUb\n", nd_pfn->uuid);
+       return sprintf(buf, "\n");
+}
+
+static ssize_t uuid_store(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t len)
+{
+       struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+       ssize_t rc;
+
+       device_lock(dev);
+       rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len);
+       dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+                       rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+       device_unlock(dev);
+
+       return rc ? rc : len;
+}
+static DEVICE_ATTR_RW(uuid);
+
+static ssize_t namespace_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+       ssize_t rc;
+
+       nvdimm_bus_lock(dev);
+       rc = sprintf(buf, "%s\n", nd_pfn->ndns
+                       ? dev_name(&nd_pfn->ndns->dev) : "");
+       nvdimm_bus_unlock(dev);
+       return rc;
+}
+
+static ssize_t namespace_store(struct device *dev,
+               struct device_attribute *attr, const char *buf, size_t len)
+{
+       struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+       ssize_t rc;
+
+       nvdimm_bus_lock(dev);
+       device_lock(dev);
+       rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len);
+       dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+                       rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+       device_unlock(dev);
+       nvdimm_bus_unlock(dev);
+
+       return rc;
+}
+static DEVICE_ATTR_RW(namespace);
+
+static struct attribute *nd_pfn_attributes[] = {
+       &dev_attr_mode.attr,
+       &dev_attr_namespace.attr,
+       &dev_attr_uuid.attr,
+       NULL,
+};
+
+static struct attribute_group nd_pfn_attribute_group = {
+       .attrs = nd_pfn_attributes,
+};
+
+static const struct attribute_group *nd_pfn_attribute_groups[] = {
+       &nd_pfn_attribute_group,
+       &nd_device_attribute_group,
+       &nd_numa_attribute_group,
+       NULL,
+};
+
+static struct device *__nd_pfn_create(struct nd_region *nd_region,
+               u8 *uuid, enum nd_pfn_mode mode,
+               struct nd_namespace_common *ndns)
+{
+       struct nd_pfn *nd_pfn;
+       struct device *dev;
+
+       /* we can only create pages for contiguous ranged of pmem */
+       if (!is_nd_pmem(&nd_region->dev))
+               return NULL;
+
+       nd_pfn = kzalloc(sizeof(*nd_pfn), GFP_KERNEL);
+       if (!nd_pfn)
+               return NULL;
+
+       nd_pfn->id = ida_simple_get(&nd_region->pfn_ida, 0, 0, GFP_KERNEL);
+       if (nd_pfn->id < 0) {
+               kfree(nd_pfn);
+               return NULL;
+       }
+
+       nd_pfn->mode = mode;
+       if (uuid)
+               uuid = kmemdup(uuid, 16, GFP_KERNEL);
+       nd_pfn->uuid = uuid;
+       dev = &nd_pfn->dev;
+       dev_set_name(dev, "pfn%d.%d", nd_region->id, nd_pfn->id);
+       dev->parent = &nd_region->dev;
+       dev->type = &nd_pfn_device_type;
+       dev->groups = nd_pfn_attribute_groups;
+       device_initialize(&nd_pfn->dev);
+       if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) {
+               dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n",
+                               __func__, dev_name(ndns->claim));
+               put_device(dev);
+               return NULL;
+       }
+       return dev;
+}
+
+struct device *nd_pfn_create(struct nd_region *nd_region)
+{
+       struct device *dev = __nd_pfn_create(nd_region, NULL, PFN_MODE_NONE,
+                       NULL);
+
+       if (dev)
+               __nd_device_register(dev);
+       return dev;
+}
+
+int nd_pfn_validate(struct nd_pfn *nd_pfn)
+{
+       struct nd_namespace_common *ndns = nd_pfn->ndns;
+       struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
+       struct nd_namespace_io *nsio;
+       u64 checksum, offset;
+
+       if (!pfn_sb || !ndns)
+               return -ENODEV;
+
+       if (!is_nd_pmem(nd_pfn->dev.parent))
+               return -ENODEV;
+
+       /* section alignment for simple hotplug */
+       if (nvdimm_namespace_capacity(ndns) < ND_PFN_ALIGN)
+               return -ENODEV;
+
+       if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb)))
+               return -ENXIO;
+
+       if (memcmp(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN) != 0)
+               return -ENODEV;
+
+       checksum = le64_to_cpu(pfn_sb->checksum);
+       pfn_sb->checksum = 0;
+       if (checksum != nd_sb_checksum((struct nd_gen_sb *) pfn_sb))
+               return -ENODEV;
+       pfn_sb->checksum = cpu_to_le64(checksum);
+
+       switch (le32_to_cpu(pfn_sb->mode)) {
+       case PFN_MODE_RAM:
+               break;
+       case PFN_MODE_PMEM:
+               /* TODO: allocate from PMEM support */
+               return -ENOTTY;
+       default:
+               return -ENXIO;
+       }
+
+       if (!nd_pfn->uuid) {
+               /* from probe we allocate */
+               nd_pfn->uuid = kmemdup(pfn_sb->uuid, 16, GFP_KERNEL);
+               if (!nd_pfn->uuid)
+                       return -ENOMEM;
+       } else {
+               /* from init we validate */
+               if (memcmp(nd_pfn->uuid, pfn_sb->uuid, 16) != 0)
+                       return -EINVAL;
+       }
+
+       /*
+        * These warnings are verbose because they can only trigger in
+        * the case where the physical address alignment of the
+        * namespace has changed since the pfn superblock was
+        * established.
+        */
+       offset = le64_to_cpu(pfn_sb->dataoff);
+       nsio = to_nd_namespace_io(&ndns->dev);
+       if (nsio->res.start & ND_PFN_MASK) {
+               dev_err(&nd_pfn->dev,
+                               "init failed: %s not section aligned\n",
+                               dev_name(&ndns->dev));
+               return -EBUSY;
+       } else if (offset >= resource_size(&nsio->res)) {
+               dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n",
+                               dev_name(&ndns->dev));
+               return -EBUSY;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(nd_pfn_validate);
+
+int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata)
+{
+       int rc;
+       struct device *dev;
+       struct nd_pfn *nd_pfn;
+       struct nd_pfn_sb *pfn_sb;
+       struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
+
+       if (ndns->force_raw)
+               return -ENODEV;
+
+       nvdimm_bus_lock(&ndns->dev);
+       dev = __nd_pfn_create(nd_region, NULL, PFN_MODE_NONE, ndns);
+       nvdimm_bus_unlock(&ndns->dev);
+       if (!dev)
+               return -ENOMEM;
+       dev_set_drvdata(dev, drvdata);
+       pfn_sb = kzalloc(sizeof(*pfn_sb), GFP_KERNEL);
+       nd_pfn = to_nd_pfn(dev);
+       nd_pfn->pfn_sb = pfn_sb;
+       rc = nd_pfn_validate(nd_pfn);
+       nd_pfn->pfn_sb = NULL;
+       kfree(pfn_sb);
+       dev_dbg(&ndns->dev, "%s: pfn: %s\n", __func__,
+                       rc == 0 ? dev_name(dev) : "<none>");
+       if (rc < 0) {
+               __nd_detach_ndns(dev, &nd_pfn->ndns);
+               put_device(dev);
+       } else
+               __nd_device_register(&nd_pfn->dev);
+
+       return rc;
+}
+EXPORT_SYMBOL(nd_pfn_probe);
index 4c079d5..b952538 100644 (file)
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/module.h>
+#include <linux/memory_hotplug.h>
 #include <linux/moduleparam.h>
+#include <linux/vmalloc.h>
 #include <linux/slab.h>
 #include <linux/pmem.h>
 #include <linux/nd.h>
+#include "pfn.h"
 #include "nd.h"
 
 struct pmem_device {
        struct request_queue    *pmem_queue;
        struct gendisk          *pmem_disk;
+       struct nd_namespace_common *ndns;
 
        /* One contiguous memory region per device */
        phys_addr_t             phys_addr;
+       /* when non-zero this device is hosting a 'pfn' instance */
+       phys_addr_t             data_offset;
        void __pmem             *virt_addr;
        size_t                  size;
 };
@@ -44,7 +50,7 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page,
                        sector_t sector)
 {
        void *mem = kmap_atomic(page);
-       size_t pmem_off = sector << 9;
+       phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
        void __pmem *pmem_addr = pmem->virt_addr + pmem_off;
 
        if (rw == READ) {
@@ -92,19 +98,26 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
 }
 
 static long pmem_direct_access(struct block_device *bdev, sector_t sector,
-                             void **kaddr, unsigned long *pfn, long size)
+                     void __pmem **kaddr, unsigned long *pfn)
 {
        struct pmem_device *pmem = bdev->bd_disk->private_data;
-       size_t offset = sector << 9;
-
-       if (!pmem)
-               return -ENODEV;
+       resource_size_t offset = sector * 512 + pmem->data_offset;
+       resource_size_t size;
+
+       if (pmem->data_offset) {
+               /*
+                * Limit the direct_access() size to what is covered by
+                * the memmap
+                */
+               size = (pmem->size - offset) & ~ND_PFN_MASK;
+       } else
+               size = pmem->size - offset;
 
        /* FIXME convert DAX to comprehend that this mapping has a lifetime */
-       *kaddr = (void __force *) pmem->virt_addr + offset;
+       *kaddr = pmem->virt_addr + offset;
        *pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT;
 
-       return pmem->size - offset;
+       return size;
 }
 
 static const struct block_device_operations pmem_fops = {
@@ -119,27 +132,33 @@ static struct pmem_device *pmem_alloc(struct device *dev,
 {
        struct pmem_device *pmem;
 
-       pmem = kzalloc(sizeof(*pmem), GFP_KERNEL);
+       pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL);
        if (!pmem)
                return ERR_PTR(-ENOMEM);
 
        pmem->phys_addr = res->start;
        pmem->size = resource_size(res);
-       if (!arch_has_pmem_api())
+       if (!arch_has_wmb_pmem())
                dev_warn(dev, "unable to guarantee persistence of writes\n");
 
-       if (!request_mem_region(pmem->phys_addr, pmem->size, dev_name(dev))) {
+       if (!devm_request_mem_region(dev, pmem->phys_addr, pmem->size,
+                       dev_name(dev))) {
                dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n",
                                &pmem->phys_addr, pmem->size);
-               kfree(pmem);
                return ERR_PTR(-EBUSY);
        }
 
-       pmem->virt_addr = memremap_pmem(pmem->phys_addr, pmem->size);
-       if (!pmem->virt_addr) {
-               release_mem_region(pmem->phys_addr, pmem->size);
-               kfree(pmem);
-               return ERR_PTR(-ENXIO);
+       if (pmem_should_map_pages(dev)) {
+               void *addr = devm_memremap_pages(dev, res);
+
+               if (IS_ERR(addr))
+                       return addr;
+               pmem->virt_addr = (void __pmem *) addr;
+       } else {
+               pmem->virt_addr = memremap_pmem(dev, pmem->phys_addr,
+                               pmem->size);
+               if (!pmem->virt_addr)
+                       return ERR_PTR(-ENXIO);
        }
 
        return pmem;
@@ -147,13 +166,16 @@ static struct pmem_device *pmem_alloc(struct device *dev,
 
 static void pmem_detach_disk(struct pmem_device *pmem)
 {
+       if (!pmem->pmem_disk)
+               return;
+
        del_gendisk(pmem->pmem_disk);
        put_disk(pmem->pmem_disk);
        blk_cleanup_queue(pmem->pmem_queue);
 }
 
-static int pmem_attach_disk(struct nd_namespace_common *ndns,
-               struct pmem_device *pmem)
+static int pmem_attach_disk(struct device *dev,
+               struct nd_namespace_common *ndns, struct pmem_device *pmem)
 {
        struct gendisk *disk;
 
@@ -162,6 +184,7 @@ static int pmem_attach_disk(struct nd_namespace_common *ndns,
                return -ENOMEM;
 
        blk_queue_make_request(pmem->pmem_queue, pmem_make_request);
+       blk_queue_physical_block_size(pmem->pmem_queue, PAGE_SIZE);
        blk_queue_max_hw_sectors(pmem->pmem_queue, UINT_MAX);
        blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY);
        queue_flag_set_unlocked(QUEUE_FLAG_NONROT, pmem->pmem_queue);
@@ -179,8 +202,8 @@ static int pmem_attach_disk(struct nd_namespace_common *ndns,
        disk->queue             = pmem->pmem_queue;
        disk->flags             = GENHD_FL_EXT_DEVT;
        nvdimm_namespace_disk_name(ndns, disk->disk_name);
-       disk->driverfs_dev = &ndns->dev;
-       set_capacity(disk, pmem->size >> 9);
+       disk->driverfs_dev = dev;
+       set_capacity(disk, (pmem->size - pmem->data_offset) / 512);
        pmem->pmem_disk = disk;
 
        add_disk(disk);
@@ -209,11 +232,152 @@ static int pmem_rw_bytes(struct nd_namespace_common *ndns,
        return 0;
 }
 
-static void pmem_free(struct pmem_device *pmem)
+static int nd_pfn_init(struct nd_pfn *nd_pfn)
+{
+       struct nd_pfn_sb *pfn_sb = kzalloc(sizeof(*pfn_sb), GFP_KERNEL);
+       struct pmem_device *pmem = dev_get_drvdata(&nd_pfn->dev);
+       struct nd_namespace_common *ndns = nd_pfn->ndns;
+       struct nd_region *nd_region;
+       unsigned long npfns;
+       phys_addr_t offset;
+       u64 checksum;
+       int rc;
+
+       if (!pfn_sb)
+               return -ENOMEM;
+
+       nd_pfn->pfn_sb = pfn_sb;
+       rc = nd_pfn_validate(nd_pfn);
+       if (rc == 0 || rc == -EBUSY)
+               return rc;
+
+       /* section alignment for simple hotplug */
+       if (nvdimm_namespace_capacity(ndns) < ND_PFN_ALIGN
+                       || pmem->phys_addr & ND_PFN_MASK)
+               return -ENODEV;
+
+       nd_region = to_nd_region(nd_pfn->dev.parent);
+       if (nd_region->ro) {
+               dev_info(&nd_pfn->dev,
+                               "%s is read-only, unable to init metadata\n",
+                               dev_name(&nd_region->dev));
+               goto err;
+       }
+
+       memset(pfn_sb, 0, sizeof(*pfn_sb));
+       npfns = (pmem->size - SZ_8K) / SZ_4K;
+       /*
+        * Note, we use 64 here for the standard size of struct page,
+        * debugging options may cause it to be larger in which case the
+        * implementation will limit the pfns advertised through
+        * ->direct_access() to those that are included in the memmap.
+        */
+       if (nd_pfn->mode == PFN_MODE_PMEM)
+               offset = ALIGN(SZ_8K + 64 * npfns, PMD_SIZE);
+       else if (nd_pfn->mode == PFN_MODE_RAM)
+               offset = SZ_8K;
+       else
+               goto err;
+
+       npfns = (pmem->size - offset) / SZ_4K;
+       pfn_sb->mode = cpu_to_le32(nd_pfn->mode);
+       pfn_sb->dataoff = cpu_to_le64(offset);
+       pfn_sb->npfns = cpu_to_le64(npfns);
+       memcpy(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN);
+       memcpy(pfn_sb->uuid, nd_pfn->uuid, 16);
+       pfn_sb->version_major = cpu_to_le16(1);
+       checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb);
+       pfn_sb->checksum = cpu_to_le64(checksum);
+
+       rc = nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb));
+       if (rc)
+               goto err;
+
+       return 0;
+ err:
+       nd_pfn->pfn_sb = NULL;
+       kfree(pfn_sb);
+       return -ENXIO;
+}
+
+static int nvdimm_namespace_detach_pfn(struct nd_namespace_common *ndns)
+{
+       struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim);
+       struct pmem_device *pmem;
+
+       /* free pmem disk */
+       pmem = dev_get_drvdata(&nd_pfn->dev);
+       pmem_detach_disk(pmem);
+
+       /* release nd_pfn resources */
+       kfree(nd_pfn->pfn_sb);
+       nd_pfn->pfn_sb = NULL;
+
+       return 0;
+}
+
+static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns)
 {
-       memunmap_pmem(pmem->virt_addr);
-       release_mem_region(pmem->phys_addr, pmem->size);
-       kfree(pmem);
+       struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
+       struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim);
+       struct device *dev = &nd_pfn->dev;
+       struct vmem_altmap *altmap;
+       struct nd_region *nd_region;
+       struct nd_pfn_sb *pfn_sb;
+       struct pmem_device *pmem;
+       phys_addr_t offset;
+       int rc;
+
+       if (!nd_pfn->uuid || !nd_pfn->ndns)
+               return -ENODEV;
+
+       nd_region = to_nd_region(dev->parent);
+       rc = nd_pfn_init(nd_pfn);
+       if (rc)
+               return rc;
+
+       if (PAGE_SIZE != SZ_4K) {
+               dev_err(dev, "only supported on systems with 4K PAGE_SIZE\n");
+               return -ENXIO;
+       }
+       if (nsio->res.start & ND_PFN_MASK) {
+               dev_err(dev, "%s not memory hotplug section aligned\n",
+                               dev_name(&ndns->dev));
+               return -ENXIO;
+       }
+
+       pfn_sb = nd_pfn->pfn_sb;
+       offset = le64_to_cpu(pfn_sb->dataoff);
+       nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode);
+       if (nd_pfn->mode == PFN_MODE_RAM) {
+               if (offset != SZ_8K)
+                       return -EINVAL;
+               nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
+               altmap = NULL;
+       } else {
+               rc = -ENXIO;
+               goto err;
+       }
+
+       /* establish pfn range for lookup, and switch to direct map */
+       pmem = dev_get_drvdata(dev);
+       memunmap_pmem(dev, pmem->virt_addr);
+       pmem->virt_addr = (void __pmem *)devm_memremap_pages(dev, &nsio->res);
+       if (IS_ERR(pmem->virt_addr)) {
+               rc = PTR_ERR(pmem->virt_addr);
+               goto err;
+       }
+
+       /* attach pmem disk in "pfn-mode" */
+       pmem->data_offset = offset;
+       rc = pmem_attach_disk(dev, ndns, pmem);
+       if (rc)
+               goto err;
+
+       return rc;
+ err:
+       nvdimm_namespace_detach_pfn(ndns);
+       return rc;
 }
 
 static int nd_pmem_probe(struct device *dev)
@@ -222,7 +386,6 @@ static int nd_pmem_probe(struct device *dev)
        struct nd_namespace_common *ndns;
        struct nd_namespace_io *nsio;
        struct pmem_device *pmem;
-       int rc;
 
        ndns = nvdimm_namespace_common_probe(dev);
        if (IS_ERR(ndns))
@@ -233,18 +396,27 @@ static int nd_pmem_probe(struct device *dev)
        if (IS_ERR(pmem))
                return PTR_ERR(pmem);
 
+       pmem->ndns = ndns;
        dev_set_drvdata(dev, pmem);
        ndns->rw_bytes = pmem_rw_bytes;
+
        if (is_nd_btt(dev))
-               rc = nvdimm_namespace_attach_btt(ndns);
-       else if (nd_btt_probe(ndns, pmem) == 0) {
+               return nvdimm_namespace_attach_btt(ndns);
+
+       if (is_nd_pfn(dev))
+               return nvdimm_namespace_attach_pfn(ndns);
+
+       if (nd_btt_probe(ndns, pmem) == 0) {
                /* we'll come back as btt-pmem */
-               rc = -ENXIO;
-       } else
-               rc = pmem_attach_disk(ndns, pmem);
-       if (rc)
-               pmem_free(pmem);
-       return rc;
+               return -ENXIO;
+       }
+
+       if (nd_pfn_probe(ndns, pmem) == 0) {
+               /* we'll come back as pfn-pmem */
+               return -ENXIO;
+       }
+
+       return pmem_attach_disk(dev, ndns, pmem);
 }
 
 static int nd_pmem_remove(struct device *dev)
@@ -252,10 +424,11 @@ static int nd_pmem_remove(struct device *dev)
        struct pmem_device *pmem = dev_get_drvdata(dev);
 
        if (is_nd_btt(dev))
-               nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns);
+               nvdimm_namespace_detach_btt(pmem->ndns);
+       else if (is_nd_pfn(dev))
+               nvdimm_namespace_detach_pfn(pmem->ndns);
        else
                pmem_detach_disk(pmem);
-       pmem_free(pmem);
 
        return 0;
 }
index f28f78c..7da63ea 100644 (file)
@@ -53,6 +53,7 @@ static int nd_region_probe(struct device *dev)
                return -ENODEV;
 
        nd_region->btt_seed = nd_btt_create(nd_region);
+       nd_region->pfn_seed = nd_pfn_create(nd_region);
        if (err == 0)
                return 0;
 
@@ -84,6 +85,7 @@ static int nd_region_remove(struct device *dev)
        nvdimm_bus_lock(dev);
        nd_region->ns_seed = NULL;
        nd_region->btt_seed = NULL;
+       nd_region->pfn_seed = NULL;
        dev_set_drvdata(dev, NULL);
        nvdimm_bus_unlock(dev);
 
index 7384455..529f3f0 100644 (file)
@@ -345,6 +345,23 @@ static ssize_t btt_seed_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(btt_seed);
 
+static ssize_t pfn_seed_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct nd_region *nd_region = to_nd_region(dev);
+       ssize_t rc;
+
+       nvdimm_bus_lock(dev);
+       if (nd_region->pfn_seed)
+               rc = sprintf(buf, "%s\n", dev_name(nd_region->pfn_seed));
+       else
+               rc = sprintf(buf, "\n");
+       nvdimm_bus_unlock(dev);
+
+       return rc;
+}
+static DEVICE_ATTR_RO(pfn_seed);
+
 static ssize_t read_only_show(struct device *dev,
                struct device_attribute *attr, char *buf)
 {
@@ -373,6 +390,7 @@ static struct attribute *nd_region_attributes[] = {
        &dev_attr_nstype.attr,
        &dev_attr_mappings.attr,
        &dev_attr_btt_seed.attr,
+       &dev_attr_pfn_seed.attr,
        &dev_attr_read_only.attr,
        &dev_attr_set_cookie.attr,
        &dev_attr_available_size.attr,
@@ -740,10 +758,12 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
        nd_region->provider_data = ndr_desc->provider_data;
        nd_region->nd_set = ndr_desc->nd_set;
        nd_region->num_lanes = ndr_desc->num_lanes;
+       nd_region->flags = ndr_desc->flags;
        nd_region->ro = ro;
        nd_region->numa_node = ndr_desc->numa_node;
        ida_init(&nd_region->ns_ida);
        ida_init(&nd_region->btt_ida);
+       ida_init(&nd_region->pfn_ida);
        dev = &nd_region->dev;
        dev_set_name(dev, "region%d", nd_region->id);
        dev->parent = &nvdimm_bus->dev;
index 8177f3b..0b2be17 100644 (file)
@@ -326,8 +326,7 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
                struct resource *res = &dev->resource[PCI_ROM_RESOURCE];
                dev->rom_base_reg = rom;
                res->flags = IORESOURCE_MEM | IORESOURCE_PREFETCH |
-                               IORESOURCE_READONLY | IORESOURCE_CACHEABLE |
-                               IORESOURCE_SIZEALIGN;
+                               IORESOURCE_READONLY | IORESOURCE_SIZEALIGN;
                __pci_read_base(dev, pci_bar_mem32, res, rom);
        }
 }
index 9357aa7..7ad3295 100644 (file)
@@ -97,8 +97,6 @@ static int pnp_assign_mem(struct pnp_dev *dev, struct pnp_mem *rule, int idx)
        /* ??? rule->flags restricted to 8 bits, all tests bogus ??? */
        if (!(rule->flags & IORESOURCE_MEM_WRITEABLE))
                res->flags |= IORESOURCE_READONLY;
-       if (rule->flags & IORESOURCE_MEM_CACHEABLE)
-               res->flags |= IORESOURCE_CACHEABLE;
        if (rule->flags & IORESOURCE_MEM_RANGELENGTH)
                res->flags |= IORESOURCE_RANGELENGTH;
        if (rule->flags & IORESOURCE_MEM_SHADOWABLE)
index 2b744fb..5ed44fe 100644 (file)
@@ -29,7 +29,7 @@ static int dcssblk_open(struct block_device *bdev, fmode_t mode);
 static void dcssblk_release(struct gendisk *disk, fmode_t mode);
 static void dcssblk_make_request(struct request_queue *q, struct bio *bio);
 static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum,
-                                void **kaddr, unsigned long *pfn, long size);
+                        void __pmem **kaddr, unsigned long *pfn);
 
 static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0";
 
@@ -881,18 +881,20 @@ fail:
 
 static long
 dcssblk_direct_access (struct block_device *bdev, sector_t secnum,
-                       void **kaddr, unsigned long *pfn, long size)
+                       void __pmem **kaddr, unsigned long *pfn)
 {
        struct dcssblk_dev_info *dev_info;
        unsigned long offset, dev_sz;
+       void *addr;
 
        dev_info = bdev->bd_disk->private_data;
        if (!dev_info)
                return -ENODEV;
        dev_sz = dev_info->end - dev_info->start;
        offset = secnum * 512;
-       *kaddr = (void *) (dev_info->start + offset);
-       *pfn = virt_to_phys(*kaddr) >> PAGE_SHIFT;
+       addr = (void *) (dev_info->start + offset);
+       *pfn = virt_to_phys(addr) >> PAGE_SHIFT;
+       *kaddr = (void __pmem *) addr;
 
        return dev_sz - offset;
 }
index 31e8576..f6c336b 100644 (file)
@@ -100,12 +100,7 @@ static int asd_map_memio(struct asd_ha_struct *asd_ha)
                                   pci_name(asd_ha->pcidev));
                        goto Err;
                }
-               if (io_handle->flags & IORESOURCE_CACHEABLE)
-                       io_handle->addr = ioremap(io_handle->start,
-                                                 io_handle->len);
-               else
-                       io_handle->addr = ioremap_nocache(io_handle->start,
-                                                         io_handle->len);
+               io_handle->addr = ioremap(io_handle->start, io_handle->len);
                if (!io_handle->addr) {
                        asd_printk("couldn't map MBAR%d of %s\n", i==0?0:1,
                                   pci_name(asd_ha->pcidev));
index 6ac74fb..333db59 100644 (file)
@@ -259,10 +259,7 @@ static bool arcmsr_remap_pciregion(struct AdapterControlBlock *acb)
                addr = (unsigned long)pci_resource_start(pdev, 0);
                range = pci_resource_len(pdev, 0);
                flags = pci_resource_flags(pdev, 0);
-               if (flags & IORESOURCE_CACHEABLE)
-                       mem_base0 = ioremap(addr, range);
-               else
-                       mem_base0 = ioremap_nocache(addr, range);
+               mem_base0 = ioremap(addr, range);
                if (!mem_base0) {
                        pr_notice("arcmsr%d: memory mapping region fail\n",
                                acb->host->host_no);
index f466a6a..e2d555c 100644 (file)
@@ -324,13 +324,9 @@ int mvs_ioremap(struct mvs_info *mvi, int bar, int bar_ex)
                        goto err_out;
 
                res_flag_ex = pci_resource_flags(pdev, bar_ex);
-               if (res_flag_ex & IORESOURCE_MEM) {
-                       if (res_flag_ex & IORESOURCE_CACHEABLE)
-                               mvi->regs_ex = ioremap(res_start, res_len);
-                       else
-                               mvi->regs_ex = ioremap_nocache(res_start,
-                                               res_len);
-               } else
+               if (res_flag_ex & IORESOURCE_MEM)
+                       mvi->regs_ex = ioremap(res_start, res_len);
+               else
                        mvi->regs_ex = (void *)res_start;
                if (!mvi->regs_ex)
                        goto err_out;
@@ -345,10 +341,7 @@ int mvs_ioremap(struct mvs_info *mvi, int bar, int bar_ex)
        }
 
        res_flag = pci_resource_flags(pdev, bar);
-       if (res_flag & IORESOURCE_CACHEABLE)
-               mvi->regs = ioremap(res_start, res_len);
-       else
-               mvi->regs = ioremap_nocache(res_start, res_len);
+       mvi->regs = ioremap(res_start, res_len);
 
        if (!mvi->regs) {
                if (mvi->regs_ex && (res_flag_ex & IORESOURCE_MEM))
index e26e81d..d50c5ed 100644 (file)
@@ -12,9 +12,9 @@
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
+#include <linux/io.h>
 
 #include <asm/sun3x.h>
-#include <asm/io.h>
 #include <asm/dma.h>
 #include <asm/dvma.h>
 
index 0768bc4..14ef1f6 100644 (file)
@@ -28,6 +28,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/io.h>
 #include "../comedidev.h"
 
 /*
index 6da7e49..2693c46 100644 (file)
@@ -20,6 +20,7 @@
  */
 
 #include <linux/uuid.h>
+#include <linux/io.h>
 
 #include "version.h"
 #include "visorbus.h"
@@ -35,7 +36,7 @@ static const uuid_le spar_video_guid = SPAR_CONSOLEVIDEO_CHANNEL_PROTOCOL_GUID;
 struct visorchannel {
        u64 physaddr;
        ulong nbytes;
-       void __iomem *mapped;
+       void *mapped;
        bool requested;
        struct channel_header chan_hdr;
        uuid_le guid;
@@ -92,7 +93,7 @@ visorchannel_create_guts(u64 physaddr, unsigned long channel_bytes,
                }
        }
 
-       channel->mapped = ioremap_cache(physaddr, size);
+       channel->mapped = memremap(physaddr, size, MEMREMAP_WB);
        if (!channel->mapped) {
                release_mem_region(physaddr, size);
                goto cleanup;
@@ -112,7 +113,7 @@ visorchannel_create_guts(u64 physaddr, unsigned long channel_bytes,
        if (uuid_le_cmp(guid, NULL_UUID_LE) == 0)
                guid = channel->chan_hdr.chtype;
 
-       iounmap(channel->mapped);
+       memunmap(channel->mapped);
        if (channel->requested)
                release_mem_region(channel->physaddr, channel->nbytes);
        channel->mapped = NULL;
@@ -125,7 +126,8 @@ visorchannel_create_guts(u64 physaddr, unsigned long channel_bytes,
                }
        }
 
-       channel->mapped = ioremap_cache(channel->physaddr, channel_bytes);
+       channel->mapped = memremap(channel->physaddr, channel_bytes,
+                       MEMREMAP_WB);
        if (!channel->mapped) {
                release_mem_region(channel->physaddr, channel_bytes);
                goto cleanup;
@@ -166,7 +168,7 @@ visorchannel_destroy(struct visorchannel *channel)
        if (!channel)
                return;
        if (channel->mapped) {
-               iounmap(channel->mapped);
+               memunmap(channel->mapped);
                if (channel->requested)
                        release_mem_region(channel->physaddr, channel->nbytes);
        }
@@ -240,7 +242,7 @@ visorchannel_read(struct visorchannel *channel, ulong offset,
        if (offset + nbytes > channel->nbytes)
                return -EIO;
 
-       memcpy_fromio(local, channel->mapped + offset, nbytes);
+       memcpy(local, channel->mapped + offset, nbytes);
 
        return 0;
 }
@@ -262,7 +264,7 @@ visorchannel_write(struct visorchannel *channel, ulong offset,
                       local, copy_size);
        }
 
-       memcpy_toio(channel->mapped + offset, local, nbytes);
+       memcpy(channel->mapped + offset, local, nbytes);
 
        return 0;
 }
index 4b76cb4..94419c3 100644 (file)
@@ -118,7 +118,7 @@ static struct visorchannel *controlvm_channel;
 
 /* Manages the request payload in the controlvm channel */
 struct visor_controlvm_payload_info {
-       u8 __iomem *ptr;        /* pointer to base address of payload pool */
+       u8 *ptr;                /* pointer to base address of payload pool */
        u64 offset;             /* offset from beginning of controlvm
                                 * channel to beginning of payload * pool */
        u32 bytes;              /* number of bytes in payload pool */
@@ -400,21 +400,22 @@ parser_init_byte_stream(u64 addr, u32 bytes, bool local, bool *retry)
                p = __va((unsigned long) (addr));
                memcpy(ctx->data, p, bytes);
        } else {
-               void __iomem *mapping;
+               void *mapping;
 
                if (!request_mem_region(addr, bytes, "visorchipset")) {
                        rc = NULL;
                        goto cleanup;
                }
 
-               mapping = ioremap_cache(addr, bytes);
+               mapping = memremap(addr, bytes, MEMREMAP_WB);
                if (!mapping) {
                        release_mem_region(addr, bytes);
                        rc = NULL;
                        goto cleanup;
                }
-               memcpy_fromio(ctx->data, mapping, bytes);
+               memcpy(ctx->data, mapping, bytes);
                release_mem_region(addr, bytes);
+               memunmap(mapping);
        }
 
        ctx->byte_stream = true;
@@ -1327,7 +1328,7 @@ static int
 initialize_controlvm_payload_info(u64 phys_addr, u64 offset, u32 bytes,
                                  struct visor_controlvm_payload_info *info)
 {
-       u8 __iomem *payload = NULL;
+       u8 *payload = NULL;
        int rc = CONTROLVM_RESP_SUCCESS;
 
        if (!info) {
@@ -1339,7 +1340,7 @@ initialize_controlvm_payload_info(u64 phys_addr, u64 offset, u32 bytes,
                rc = -CONTROLVM_RESP_ERROR_PAYLOAD_INVALID;
                goto cleanup;
        }
-       payload = ioremap_cache(phys_addr + offset, bytes);
+       payload = memremap(phys_addr + offset, bytes, MEMREMAP_WB);
        if (!payload) {
                rc = -CONTROLVM_RESP_ERROR_IOREMAP_FAILED;
                goto cleanup;
@@ -1352,7 +1353,7 @@ initialize_controlvm_payload_info(u64 phys_addr, u64 offset, u32 bytes,
 cleanup:
        if (rc < 0) {
                if (payload) {
-                       iounmap(payload);
+                       memunmap(payload);
                        payload = NULL;
                }
        }
@@ -1363,7 +1364,7 @@ static void
 destroy_controlvm_payload_info(struct visor_controlvm_payload_info *info)
 {
        if (info->ptr) {
-               iounmap(info->ptr);
+               memunmap(info->ptr);
                info->ptr = NULL;
        }
        memset(info, 0, sizeof(struct visor_controlvm_payload_info));
index cfbb9d7..271d121 100644 (file)
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 #include <linux/pm_runtime.h>
+#include <linux/io.h>
 #ifdef CONFIG_SPARC
 #include <linux/sunserialcore.h>
 #endif
 
-#include <asm/io.h>
 #include <asm/irq.h>
 
 #include "8250.h"
index de98196..c9293ae 100644 (file)
@@ -325,7 +325,6 @@ static int ocfb_probe(struct platform_device *pdev)
                dev_err(&pdev->dev, "I/O resource request failed\n");
                return -ENXIO;
        }
-       res->flags &= ~IORESOURCE_CACHEABLE;
        fbdev->regs = devm_ioremap_resource(&pdev->dev, res);
        if (IS_ERR(fbdev->regs))
                return PTR_ERR(fbdev->regs);
index 83433cb..96aa46d 100644 (file)
@@ -32,8 +32,7 @@
 #include <linux/spinlock_types.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
-
-#include <asm/io.h>
+#include <linux/io.h>
 
 #include <video/s1d13xxxfb.h>
 
index 735355b..7df4228 100644 (file)
@@ -64,6 +64,7 @@
 #include <linux/fb.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
+#include <linux/io.h>
 
 #include <asm/grfioctl.h>      /* for HP-UX compatibility */
 #include <asm/uaccess.h>
index 33b813e..f77da0e 100644 (file)
@@ -441,7 +441,7 @@ EXPORT_SYMBOL_GPL(bdev_write_page);
  * accessible at this address.
  */
 long bdev_direct_access(struct block_device *bdev, sector_t sector,
-                       void **addr, unsigned long *pfn, long size)
+                       void __pmem **addr, unsigned long *pfn, long size)
 {
        long avail;
        const struct block_device_operations *ops = bdev->bd_disk->fops;
@@ -462,7 +462,7 @@ long bdev_direct_access(struct block_device *bdev, sector_t sector,
        sector += get_start_sect(bdev);
        if (sector % (PAGE_SIZE / 512))
                return -EINVAL;
-       avail = ops->direct_access(bdev, sector, addr, pfn, size);
+       avail = ops->direct_access(bdev, sector, addr, pfn);
        if (!avail)
                return -ERANGE;
        return min(avail, size);
index a7f77e1..57bb70b 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -23,6 +23,7 @@
 #include <linux/memcontrol.h>
 #include <linux/mm.h>
 #include <linux/mutex.h>
+#include <linux/pmem.h>
 #include <linux/sched.h>
 #include <linux/uio.h>
 #include <linux/vmstat.h>
@@ -34,7 +35,7 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long size)
 
        might_sleep();
        do {
-               void *addr;
+               void __pmem *addr;
                unsigned long pfn;
                long count;
 
@@ -46,10 +47,7 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long size)
                        unsigned pgsz = PAGE_SIZE - offset_in_page(addr);
                        if (pgsz > count)
                                pgsz = count;
-                       if (pgsz < PAGE_SIZE)
-                               memset(addr, 0, pgsz);
-                       else
-                               clear_page(addr);
+                       clear_pmem(addr, pgsz);
                        addr += pgsz;
                        size -= pgsz;
                        count -= pgsz;
@@ -59,26 +57,29 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long size)
                }
        } while (size);
 
+       wmb_pmem();
        return 0;
 }
 EXPORT_SYMBOL_GPL(dax_clear_blocks);
 
-static long dax_get_addr(struct buffer_head *bh, void **addr, unsigned blkbits)
+static long dax_get_addr(struct buffer_head *bh, void __pmem **addr,
+               unsigned blkbits)
 {
        unsigned long pfn;
        sector_t sector = bh->b_blocknr << (blkbits - 9);
        return bdev_direct_access(bh->b_bdev, sector, addr, &pfn, bh->b_size);
 }
 
-static void dax_new_buf(void *addr, unsigned size, unsigned first, loff_t pos,
-                       loff_t end)
+/* the clear_pmem() calls are ordered by a wmb_pmem() in the caller */
+static void dax_new_buf(void __pmem *addr, unsigned size, unsigned first,
+               loff_t pos, loff_t end)
 {
        loff_t final = end - pos + first; /* The final byte of the buffer */
 
        if (first > 0)
-               memset(addr, 0, first);
+               clear_pmem(addr, first);
        if (final < size)
-               memset(addr + final, 0, size - final);
+               clear_pmem(addr + final, size - final);
 }
 
 static bool buffer_written(struct buffer_head *bh)
@@ -106,14 +107,15 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
        loff_t pos = start;
        loff_t max = start;
        loff_t bh_max = start;
-       void *addr;
+       void __pmem *addr;
        bool hole = false;
+       bool need_wmb = false;
 
        if (iov_iter_rw(iter) != WRITE)
                end = min(end, i_size_read(inode));
 
        while (pos < end) {
-               unsigned len;
+               size_t len;
                if (pos == max) {
                        unsigned blkbits = inode->i_blkbits;
                        sector_t block = pos >> blkbits;
@@ -145,19 +147,23 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
                                retval = dax_get_addr(bh, &addr, blkbits);
                                if (retval < 0)
                                        break;
-                               if (buffer_unwritten(bh) || buffer_new(bh))
+                               if (buffer_unwritten(bh) || buffer_new(bh)) {
                                        dax_new_buf(addr, retval, first, pos,
                                                                        end);
+                                       need_wmb = true;
+                               }
                                addr += first;
                                size = retval - first;
                        }
                        max = min(pos + size, end);
                }
 
-               if (iov_iter_rw(iter) == WRITE)
-                       len = copy_from_iter_nocache(addr, max - pos, iter);
-               else if (!hole)
-                       len = copy_to_iter(addr, max - pos, iter);
+               if (iov_iter_rw(iter) == WRITE) {
+                       len = copy_from_iter_pmem(addr, max - pos, iter);
+                       need_wmb = true;
+               } else if (!hole)
+                       len = copy_to_iter((void __force *)addr, max - pos,
+                                       iter);
                else
                        len = iov_iter_zero(max - pos, iter);
 
@@ -168,6 +174,9 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
                addr += len;
        }
 
+       if (need_wmb)
+               wmb_pmem();
+
        return (pos == start) ? retval : pos - start;
 }
 
@@ -260,11 +269,13 @@ static int dax_load_hole(struct address_space *mapping, struct page *page,
 static int copy_user_bh(struct page *to, struct buffer_head *bh,
                        unsigned blkbits, unsigned long vaddr)
 {
-       void *vfrom, *vto;
+       void __pmem *vfrom;
+       void *vto;
+
        if (dax_get_addr(bh, &vfrom, blkbits) < 0)
                return -EIO;
        vto = kmap_atomic(to);
-       copy_user_page(vto, vfrom, vaddr, to);
+       copy_user_page(vto, (void __force *)vfrom, vaddr, to);
        kunmap_atomic(vto);
        return 0;
 }
@@ -275,7 +286,7 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
        struct address_space *mapping = inode->i_mapping;
        sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9);
        unsigned long vaddr = (unsigned long)vmf->virtual_address;
-       void *addr;
+       void __pmem *addr;
        unsigned long pfn;
        pgoff_t size;
        int error;
@@ -303,8 +314,10 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
                goto out;
        }
 
-       if (buffer_unwritten(bh) || buffer_new(bh))
-               clear_page(addr);
+       if (buffer_unwritten(bh) || buffer_new(bh)) {
+               clear_pmem(addr, PAGE_SIZE);
+               wmb_pmem();
+       }
 
        error = vm_insert_mixed(vma, vaddr, pfn);
 
@@ -548,11 +561,12 @@ int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length,
        if (err < 0)
                return err;
        if (buffer_written(&bh)) {
-               void *addr;
+               void __pmem *addr;
                err = dax_get_addr(&bh, &addr, inode->i_blkbits);
                if (err < 0)
                        return err;
-               memset(addr + offset, 0, length);
+               clear_pmem(addr + offset, length);
+               wmb_pmem();
        }
 
        return 0;
index 14909b0..f20f407 100644 (file)
 })
 #endif /* CONFIG_FLATMEM/DISCONTIGMEM/SPARSEMEM */
 
+/*
+ * Convert a physical address to a Page Frame Number and back
+ */
+#define        __phys_to_pfn(paddr)    ((unsigned long)((paddr) >> PAGE_SHIFT))
+#define        __pfn_to_phys(pfn)      ((pfn) << PAGE_SHIFT)
+
 #define page_to_pfn __page_to_pfn
 #define pfn_to_page __pfn_to_page
 
index a622f27..708923b 100644 (file)
@@ -1569,8 +1569,8 @@ struct block_device_operations {
        int (*rw_page)(struct block_device *, sector_t, struct page *, int rw);
        int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
        int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
-       long (*direct_access)(struct block_device *, sector_t,
-                                       void **, unsigned long *pfn, long size);
+       long (*direct_access)(struct block_device *, sector_t, void __pmem **,
+                       unsigned long *pfn);
        unsigned int (*check_events) (struct gendisk *disk,
                                      unsigned int clearing);
        /* ->media_changed() is DEPRECATED, use ->check_events() instead */
@@ -1588,8 +1588,8 @@ extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
 extern int bdev_read_page(struct block_device *, sector_t, struct page *);
 extern int bdev_write_page(struct block_device *, sector_t, struct page *,
                                                struct writeback_control *);
-extern long bdev_direct_access(struct block_device *, sector_t, void **addr,
-                                               unsigned long *pfn, long size);
+extern long bdev_direct_access(struct block_device *, sector_t,
+               void __pmem **addr, unsigned long *pfn, long size);
 #else /* CONFIG_BLOCK */
 
 struct block_device;
index c27dde7..e399029 100644 (file)
@@ -21,7 +21,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/bug.h>
-#include <asm/io.h>
+#include <linux/io.h>
 #include <asm/page.h>
 
 /*
index fb5a998..de64c1e 100644 (file)
 
 #include <linux/types.h>
 #include <linux/init.h>
+#include <linux/bug.h>
+#include <linux/err.h>
 #include <asm/io.h>
 #include <asm/page.h>
 
 struct device;
+struct resource;
 
 __visible void __iowrite32_copy(void __iomem *to, const void *from, size_t count);
 void __iowrite64_copy(void __iomem *to, const void *from, size_t count);
@@ -80,6 +83,27 @@ int check_signature(const volatile void __iomem *io_addr,
                        const unsigned char *signature, int length);
 void devm_ioremap_release(struct device *dev, void *res);
 
+void *devm_memremap(struct device *dev, resource_size_t offset,
+               size_t size, unsigned long flags);
+void devm_memunmap(struct device *dev, void *addr);
+
+void *__devm_memremap_pages(struct device *dev, struct resource *res);
+
+#ifdef CONFIG_ZONE_DEVICE
+void *devm_memremap_pages(struct device *dev, struct resource *res);
+#else
+static inline void *devm_memremap_pages(struct device *dev, struct resource *res)
+{
+       /*
+        * Fail attempts to call devm_memremap_pages() without
+        * ZONE_DEVICE support enabled, this requires callers to fall
+        * back to plain devm_memremap() based on config
+        */
+       WARN_ON_ONCE(1);
+       return ERR_PTR(-ENXIO);
+}
+#endif
+
 /*
  * Some systems do not have legacy ISA devices.
  * /dev/port is not a valid interface on these systems.
@@ -121,4 +145,13 @@ static inline int arch_phys_wc_index(int handle)
 #endif
 #endif
 
+enum {
+       /* See memremap() kernel-doc for usage description... */
+       MEMREMAP_WB = 1 << 0,
+       MEMREMAP_WT = 1 << 1,
+};
+
+void *memremap(resource_size_t offset, size_t size, unsigned long flags);
+void memunmap(void *addr);
+
 #endif /* _LINUX_IO_H */
index 75e3af0..3f021dc 100644 (file)
@@ -31,6 +31,9 @@ enum {
        ND_CMD_ARS_STATUS_MAX = SZ_4K,
        ND_MAX_MAPPINGS = 32,
 
+       /* region flag indicating to direct-map persistent memory by default */
+       ND_REGION_PAGEMAP = 0,
+
        /* mark newly adjusted resources as requiring a label update */
        DPA_RESOURCE_ADJUSTED = 1 << 0,
 };
@@ -91,6 +94,7 @@ struct nd_region_desc {
        void *provider_data;
        int num_lanes;
        int numa_node;
+       unsigned long flags;
 };
 
 struct nvdimm_bus;
index 6ffa0ac..8f60e89 100644 (file)
@@ -266,8 +266,9 @@ static inline void remove_memory(int nid, u64 start, u64 size) {}
 extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
                void *arg, int (*func)(struct memory_block *, void *));
 extern int add_memory(int nid, u64 start, u64 size);
-extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default);
-extern int arch_add_memory(int nid, u64 start, u64 size);
+extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default,
+               bool for_device);
+extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device);
 extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
 extern bool is_memblock_offlined(struct memory_block *mem);
 extern void remove_memory(int nid, u64 start, u64 size);
index 8b257c4..1171a29 100644 (file)
@@ -372,7 +372,14 @@ static inline int put_page_unless_one(struct page *page)
 }
 
 extern int page_is_ram(unsigned long pfn);
-extern int region_is_ram(resource_size_t phys_addr, unsigned long size);
+
+enum {
+       REGION_INTERSECTS,
+       REGION_DISJOINT,
+       REGION_MIXED,
+};
+
+int region_intersects(resource_size_t offset, size_t size, const char *type);
 
 /* Support for virtually mapped pages */
 struct page *vmalloc_to_page(const void *addr);
index ac00e20..d943477 100644 (file)
@@ -319,7 +319,11 @@ enum zone_type {
        ZONE_HIGHMEM,
 #endif
        ZONE_MOVABLE,
+#ifdef CONFIG_ZONE_DEVICE
+       ZONE_DEVICE,
+#endif
        __MAX_NR_ZONES
+
 };
 
 #ifndef __GENERATING_BOUNDS_H
@@ -786,6 +790,25 @@ static inline bool pgdat_is_empty(pg_data_t *pgdat)
        return !pgdat->node_start_pfn && !pgdat->node_spanned_pages;
 }
 
+static inline int zone_id(const struct zone *zone)
+{
+       struct pglist_data *pgdat = zone->zone_pgdat;
+
+       return zone - pgdat->node_zones;
+}
+
+#ifdef CONFIG_ZONE_DEVICE
+static inline bool is_dev_zone(const struct zone *zone)
+{
+       return zone_id(zone) == ZONE_DEVICE;
+}
+#else
+static inline bool is_dev_zone(const struct zone *zone)
+{
+       return false;
+}
+#endif
+
 #include <linux/memory_hotplug.h>
 
 extern struct mutex zonelists_mutex;
index 29975c7..366cf77 100644 (file)
@@ -27,9 +27,9 @@
 #include <linux/string.h>
 #include <linux/bug.h>
 #include <linux/kernel.h>
+#include <linux/io.h>
 
 #include <asm/unaligned.h>
-#include <asm/io.h>
 #include <asm/barrier.h>
 
 #ifdef CONFIG_MTD_MAP_BANK_WIDTH_1
index d211404..85f810b 100644 (file)
 #define __PMEM_H__
 
 #include <linux/io.h>
+#include <linux/uio.h>
 
 #ifdef CONFIG_ARCH_HAS_PMEM_API
-#include <asm/cacheflush.h>
+#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
+#include <asm/pmem.h>
 #else
+#define ARCH_MEMREMAP_PMEM MEMREMAP_WT
+/*
+ * These are simply here to enable compilation, all call sites gate
+ * calling these symbols with arch_has_pmem_api() and redirect to the
+ * implementation in asm/pmem.h.
+ */
+static inline bool __arch_has_wmb_pmem(void)
+{
+       return false;
+}
+
 static inline void arch_wmb_pmem(void)
 {
        BUG();
 }
 
-static inline bool __arch_has_wmb_pmem(void)
+static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
+               size_t n)
 {
-       return false;
+       BUG();
 }
 
-static inline void __pmem *arch_memremap_pmem(resource_size_t offset,
-               unsigned long size)
+static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes,
+               struct iov_iter *i)
 {
-       return NULL;
+       BUG();
+       return 0;
 }
 
-static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
-               size_t n)
+static inline void arch_clear_pmem(void __pmem *addr, size_t size)
 {
        BUG();
 }
@@ -43,18 +57,22 @@ static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src,
 
 /*
  * Architectures that define ARCH_HAS_PMEM_API must provide
- * implementations for arch_memremap_pmem(), arch_memcpy_to_pmem(),
- * arch_wmb_pmem(), and __arch_has_wmb_pmem().
+ * implementations for arch_memcpy_to_pmem(), arch_wmb_pmem(),
+ * arch_copy_from_iter_pmem(), arch_clear_pmem() and arch_has_wmb_pmem().
  */
-
 static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size)
 {
        memcpy(dst, (void __force const *) src, size);
 }
 
-static inline void memunmap_pmem(void __pmem *addr)
+static inline void memunmap_pmem(struct device *dev, void __pmem *addr)
+{
+       devm_memunmap(dev, (void __force *) addr);
+}
+
+static inline bool arch_has_pmem_api(void)
 {
-       iounmap((void __force __iomem *) addr);
+       return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API);
 }
 
 /**
@@ -68,14 +86,7 @@ static inline void memunmap_pmem(void __pmem *addr)
  */
 static inline bool arch_has_wmb_pmem(void)
 {
-       if (IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API))
-               return __arch_has_wmb_pmem();
-       return false;
-}
-
-static inline bool arch_has_pmem_api(void)
-{
-       return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && arch_has_wmb_pmem();
+       return arch_has_pmem_api() && __arch_has_wmb_pmem();
 }
 
 /*
@@ -85,16 +96,24 @@ static inline bool arch_has_pmem_api(void)
  * default_memremap_pmem + default_memcpy_to_pmem is sufficient for
  * making data durable relative to i/o completion.
  */
-static void default_memcpy_to_pmem(void __pmem *dst, const void *src,
+static inline void default_memcpy_to_pmem(void __pmem *dst, const void *src,
                size_t size)
 {
        memcpy((void __force *) dst, src, size);
 }
 
-static void __pmem *default_memremap_pmem(resource_size_t offset,
-               unsigned long size)
+static inline size_t default_copy_from_iter_pmem(void __pmem *addr,
+               size_t bytes, struct iov_iter *i)
+{
+       return copy_from_iter_nocache((void __force *)addr, bytes, i);
+}
+
+static inline void default_clear_pmem(void __pmem *addr, size_t size)
 {
-       return (void __pmem __force *)ioremap_wt(offset, size);
+       if (size == PAGE_SIZE && ((unsigned long)addr & ~PAGE_MASK) == 0)
+               clear_page((void __force *)addr);
+       else
+               memset((void __force *)addr, 0, size);
 }
 
 /**
@@ -109,12 +128,11 @@ static void __pmem *default_memremap_pmem(resource_size_t offset,
  * wmb_pmem() arrange for the data to be written through the
  * cache to persistent media.
  */
-static inline void __pmem *memremap_pmem(resource_size_t offset,
-               unsigned long size)
+static inline void __pmem *memremap_pmem(struct device *dev,
+               resource_size_t offset, unsigned long size)
 {
-       if (arch_has_pmem_api())
-               return arch_memremap_pmem(offset, size);
-       return default_memremap_pmem(offset, size);
+       return (void __pmem *) devm_memremap(dev, offset, size,
+                       ARCH_MEMREMAP_PMEM);
 }
 
 /**
@@ -146,7 +164,42 @@ static inline void memcpy_to_pmem(void __pmem *dst, const void *src, size_t n)
  */
 static inline void wmb_pmem(void)
 {
-       if (arch_has_pmem_api())
+       if (arch_has_wmb_pmem())
                arch_wmb_pmem();
+       else
+               wmb();
+}
+
+/**
+ * copy_from_iter_pmem - copy data from an iterator to PMEM
+ * @addr:      PMEM destination address
+ * @bytes:     number of bytes to copy
+ * @i:         iterator with source data
+ *
+ * Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
+ * This function requires explicit ordering with a wmb_pmem() call.
+ */
+static inline size_t copy_from_iter_pmem(void __pmem *addr, size_t bytes,
+               struct iov_iter *i)
+{
+       if (arch_has_pmem_api())
+               return arch_copy_from_iter_pmem(addr, bytes, i);
+       return default_copy_from_iter_pmem(addr, bytes, i);
+}
+
+/**
+ * clear_pmem - zero a PMEM memory range
+ * @addr:      virtual start address
+ * @size:      number of bytes to zero
+ *
+ * Write zeros into the memory range starting at 'addr' for 'size' bytes.
+ * This function requires explicit ordering with a wmb_pmem() call.
+ */
+static inline void clear_pmem(void __pmem *addr, size_t size)
+{
+       if (arch_has_pmem_api())
+               arch_clear_pmem(addr, size);
+       else
+               default_clear_pmem(addr, size);
 }
 #endif /* __PMEM_H__ */
index 2b94ea2..5b4a4be 100644 (file)
@@ -87,7 +87,7 @@ struct nd_cmd_ars_status {
                __u32 handle;
                __u32 flags;
                __u64 err_address;
-               __u64 mask;
+               __u64 length;
        } __packed records[0];
 } __packed;
 
@@ -111,6 +111,11 @@ enum {
        ND_CMD_VENDOR = 9,
 };
 
+enum {
+       ND_ARS_VOLATILE = 1,
+       ND_ARS_PERSISTENT = 2,
+};
+
 static inline const char *nvdimm_bus_cmd_name(unsigned cmd)
 {
        static const char * const names[] = {
@@ -194,4 +199,9 @@ enum nd_driver_flags {
 enum {
        ND_MIN_NAMESPACE_SIZE = 0x00400000,
 };
+
+enum ars_masks {
+       ARS_STATUS_MASK = 0x0000FFFF,
+       ARS_EXT_STATUS_SHIFT = 16,
+};
 #endif /* __NDCTL_H__ */
index cac567f..d334e64 100644 (file)
@@ -18,7 +18,7 @@
 #define __linux_video_vga_h__
 
 #include <linux/types.h>
-#include <asm/io.h>
+#include <linux/io.h>
 #include <asm/vga.h>
 #include <asm/byteorder.h>
 
index d25ebea..e0d7587 100644 (file)
@@ -99,6 +99,8 @@ obj-$(CONFIG_JUMP_LABEL) += jump_label.o
 obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
 obj-$(CONFIG_TORTURE_TEST) += torture.o
 
+obj-$(CONFIG_HAS_IOMEM) += memremap.o
+
 $(obj)/configs.o: $(obj)/config_data.h
 
 # config_data.h contains the same information as ikconfig.h but gzipped.
diff --git a/kernel/memremap.c b/kernel/memremap.c
new file mode 100644 (file)
index 0000000..72b0c66
--- /dev/null
@@ -0,0 +1,190 @@
+/*
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/device.h>
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/memory_hotplug.h>
+
+#ifndef ioremap_cache
+/* temporary while we convert existing ioremap_cache users to memremap */
+__weak void __iomem *ioremap_cache(resource_size_t offset, unsigned long size)
+{
+       return ioremap(offset, size);
+}
+#endif
+
+/**
+ * memremap() - remap an iomem_resource as cacheable memory
+ * @offset: iomem resource start address
+ * @size: size of remap
+ * @flags: either MEMREMAP_WB or MEMREMAP_WT
+ *
+ * memremap() is "ioremap" for cases where it is known that the resource
+ * being mapped does not have i/o side effects and the __iomem
+ * annotation is not applicable.
+ *
+ * MEMREMAP_WB - matches the default mapping for "System RAM" on
+ * the architecture.  This is usually a read-allocate write-back cache.
+ * Morever, if MEMREMAP_WB is specified and the requested remap region is RAM
+ * memremap() will bypass establishing a new mapping and instead return
+ * a pointer into the direct map.
+ *
+ * MEMREMAP_WT - establish a mapping whereby writes either bypass the
+ * cache or are written through to memory and never exist in a
+ * cache-dirty state with respect to program visibility.  Attempts to
+ * map "System RAM" with this mapping type will fail.
+ */
+void *memremap(resource_size_t offset, size_t size, unsigned long flags)
+{
+       int is_ram = region_intersects(offset, size, "System RAM");
+       void *addr = NULL;
+
+       if (is_ram == REGION_MIXED) {
+               WARN_ONCE(1, "memremap attempted on mixed range %pa size: %#lx\n",
+                               &offset, (unsigned long) size);
+               return NULL;
+       }
+
+       /* Try all mapping types requested until one returns non-NULL */
+       if (flags & MEMREMAP_WB) {
+               flags &= ~MEMREMAP_WB;
+               /*
+                * MEMREMAP_WB is special in that it can be satisifed
+                * from the direct map.  Some archs depend on the
+                * capability of memremap() to autodetect cases where
+                * the requested range is potentially in "System RAM"
+                */
+               if (is_ram == REGION_INTERSECTS)
+                       addr = __va(offset);
+               else
+                       addr = ioremap_cache(offset, size);
+       }
+
+       /*
+        * If we don't have a mapping yet and more request flags are
+        * pending then we will be attempting to establish a new virtual
+        * address mapping.  Enforce that this mapping is not aliasing
+        * "System RAM"
+        */
+       if (!addr && is_ram == REGION_INTERSECTS && flags) {
+               WARN_ONCE(1, "memremap attempted on ram %pa size: %#lx\n",
+                               &offset, (unsigned long) size);
+               return NULL;
+       }
+
+       if (!addr && (flags & MEMREMAP_WT)) {
+               flags &= ~MEMREMAP_WT;
+               addr = ioremap_wt(offset, size);
+       }
+
+       return addr;
+}
+EXPORT_SYMBOL(memremap);
+
+void memunmap(void *addr)
+{
+       if (is_vmalloc_addr(addr))
+               iounmap((void __iomem *) addr);
+}
+EXPORT_SYMBOL(memunmap);
+
+static void devm_memremap_release(struct device *dev, void *res)
+{
+       memunmap(res);
+}
+
+static int devm_memremap_match(struct device *dev, void *res, void *match_data)
+{
+       return *(void **)res == match_data;
+}
+
+void *devm_memremap(struct device *dev, resource_size_t offset,
+               size_t size, unsigned long flags)
+{
+       void **ptr, *addr;
+
+       ptr = devres_alloc(devm_memremap_release, sizeof(*ptr), GFP_KERNEL);
+       if (!ptr)
+               return NULL;
+
+       addr = memremap(offset, size, flags);
+       if (addr) {
+               *ptr = addr;
+               devres_add(dev, ptr);
+       } else
+               devres_free(ptr);
+
+       return addr;
+}
+EXPORT_SYMBOL(devm_memremap);
+
+void devm_memunmap(struct device *dev, void *addr)
+{
+       WARN_ON(devres_destroy(dev, devm_memremap_release, devm_memremap_match,
+                              addr));
+       memunmap(addr);
+}
+EXPORT_SYMBOL(devm_memunmap);
+
+#ifdef CONFIG_ZONE_DEVICE
+struct page_map {
+       struct resource res;
+};
+
+static void devm_memremap_pages_release(struct device *dev, void *res)
+{
+       struct page_map *page_map = res;
+
+       /* pages are dead and unused, undo the arch mapping */
+       arch_remove_memory(page_map->res.start, resource_size(&page_map->res));
+}
+
+void *devm_memremap_pages(struct device *dev, struct resource *res)
+{
+       int is_ram = region_intersects(res->start, resource_size(res),
+                       "System RAM");
+       struct page_map *page_map;
+       int error, nid;
+
+       if (is_ram == REGION_MIXED) {
+               WARN_ONCE(1, "%s attempted on mixed region %pr\n",
+                               __func__, res);
+               return ERR_PTR(-ENXIO);
+       }
+
+       if (is_ram == REGION_INTERSECTS)
+               return __va(res->start);
+
+       page_map = devres_alloc(devm_memremap_pages_release,
+                       sizeof(*page_map), GFP_KERNEL);
+       if (!page_map)
+               return ERR_PTR(-ENOMEM);
+
+       memcpy(&page_map->res, res, sizeof(*res));
+
+       nid = dev_to_node(dev);
+       if (nid < 0)
+               nid = 0;
+
+       error = arch_add_memory(nid, res->start, resource_size(res), true);
+       if (error) {
+               devres_free(page_map);
+               return ERR_PTR(error);
+       }
+
+       devres_add(dev, page_map);
+       return __va(res->start);
+}
+EXPORT_SYMBOL(devm_memremap_pages);
+#endif /* CONFIG_ZONE_DEVICE */
index fed052a..f150dbb 100644 (file)
@@ -492,40 +492,51 @@ int __weak page_is_ram(unsigned long pfn)
 }
 EXPORT_SYMBOL_GPL(page_is_ram);
 
-/*
- * Search for a resouce entry that fully contains the specified region.
- * If found, return 1 if it is RAM, 0 if not.
- * If not found, or region is not fully contained, return -1
+/**
+ * region_intersects() - determine intersection of region with known resources
+ * @start: region start address
+ * @size: size of region
+ * @name: name of resource (in iomem_resource)
  *
- * Used by the ioremap functions to ensure the user is not remapping RAM and is
- * a vast speed up over walking through the resource table page by page.
+ * Check if the specified region partially overlaps or fully eclipses a
+ * resource identified by @name.  Return REGION_DISJOINT if the region
+ * does not overlap @name, return REGION_MIXED if the region overlaps
+ * @type and another resource, and return REGION_INTERSECTS if the
+ * region overlaps @type and no other defined resource. Note, that
+ * REGION_INTERSECTS is also returned in the case when the specified
+ * region overlaps RAM and undefined memory holes.
+ *
+ * region_intersect() is used by memory remapping functions to ensure
+ * the user is not remapping RAM and is a vast speed up over walking
+ * through the resource table page by page.
  */
-int region_is_ram(resource_size_t start, unsigned long size)
+int region_intersects(resource_size_t start, size_t size, const char *name)
 {
-       struct resource *p;
-       resource_size_t end = start + size - 1;
        unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-       const char *name = "System RAM";
-       int ret = -1;
+       resource_size_t end = start + size - 1;
+       int type = 0; int other = 0;
+       struct resource *p;
 
        read_lock(&resource_lock);
        for (p = iomem_resource.child; p ; p = p->sibling) {
-               if (p->end < start)
-                       continue;
-
-               if (p->start <= start && end <= p->end) {
-                       /* resource fully contains region */
-                       if ((p->flags != flags) || strcmp(p->name, name))
-                               ret = 0;
-                       else
-                               ret = 1;
-                       break;
-               }
-               if (end < p->start)
-                       break;  /* not found */
+               bool is_type = strcmp(p->name, name) == 0 && p->flags == flags;
+
+               if (start >= p->start && start <= p->end)
+                       is_type ? type++ : other++;
+               if (end >= p->start && end <= p->end)
+                       is_type ? type++ : other++;
+               if (p->start >= start && p->end <= end)
+                       is_type ? type++ : other++;
        }
        read_unlock(&resource_lock);
-       return ret;
+
+       if (other == 0)
+               return type ? REGION_INTERSECTS : REGION_DISJOINT;
+
+       if (type)
+               return REGION_MIXED;
+
+       return REGION_DISJOINT;
 }
 
 void __weak arch_remove_reservations(struct resource *avail)
index 8a49ff9..2e491ac 100644 (file)
@@ -525,4 +525,7 @@ config ARCH_HAS_SG_CHAIN
 config ARCH_HAS_PMEM_API
        bool
 
+config ARCH_HAS_MMIO_FLUSH
+       bool
+
 endmenu
index fbe2aac..f13a246 100644 (file)
@@ -119,10 +119,9 @@ EXPORT_SYMBOL(devm_iounmap);
  * @dev: generic device to handle the resource for
  * @res: resource to be handled
  *
- * Checks that a resource is a valid memory region, requests the memory region
- * and ioremaps it either as cacheable or as non-cacheable memory depending on
- * the resource's flags. All operations are managed and will be undone on
- * driver detach.
+ * Checks that a resource is a valid memory region, requests the memory
+ * region and ioremaps it. All operations are managed and will be undone
+ * on driver detach.
  *
  * Returns a pointer to the remapped memory or an ERR_PTR() encoded error code
  * on failure. Usage example:
@@ -153,11 +152,7 @@ void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res)
                return IOMEM_ERR_PTR(-EBUSY);
        }
 
-       if (res->flags & IORESOURCE_CACHEABLE)
-               dest_ptr = devm_ioremap(dev, res->start, size);
-       else
-               dest_ptr = devm_ioremap_nocache(dev, res->start, size);
-
+       dest_ptr = devm_ioremap(dev, res->start, size);
        if (!dest_ptr) {
                dev_err(dev, "ioremap failed for resource %pR\n", res);
                devm_release_mem_region(dev, res->start, size);
index 5f5d24d..c10fba4 100644 (file)
@@ -41,11 +41,8 @@ void __iomem *pci_iomap_range(struct pci_dev *dev,
                len = maxlen;
        if (flags & IORESOURCE_IO)
                return __pci_ioport_map(dev, start, len);
-       if (flags & IORESOURCE_MEM) {
-               if (flags & IORESOURCE_CACHEABLE)
-                       return ioremap(start, len);
-               return ioremap_nocache(start, len);
-       }
+       if (flags & IORESOURCE_MEM)
+               return ioremap(start, len);
        /* What? */
        return NULL;
 }
index d4e6495..3a4070f 100644 (file)
@@ -648,3 +648,20 @@ config DEFERRED_STRUCT_PAGE_INIT
          when kswapd starts. This has a potential performance impact on
          processes running early in the lifetime of the systemm until kswapd
          finishes the initialisation.
+
+config ZONE_DEVICE
+       bool "Device memory (pmem, etc...) hotplug support" if EXPERT
+       default !ZONE_DMA
+       depends on !ZONE_DMA
+       depends on MEMORY_HOTPLUG
+       depends on MEMORY_HOTREMOVE
+       depends on X86_64 #arch_add_memory() comprehends device memory
+
+       help
+         Device memory hotplug support allows for establishing pmem,
+         or other device driver discovered memory regions, in the
+         memmap. This allows pfn_to_page() lookups of otherwise
+         "device-physical" addresses which is needed for using a DAX
+         mapping in an O_DIRECT operation, among other things.
+
+         If FS_DAX is enabled, then say Y.
index 8fd97da..aa992e2 100644 (file)
@@ -778,7 +778,10 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
 
        start = phys_start_pfn << PAGE_SHIFT;
        size = nr_pages * PAGE_SIZE;
-       ret = release_mem_region_adjustable(&iomem_resource, start, size);
+
+       /* in the ZONE_DEVICE case device driver owns the memory region */
+       if (!is_dev_zone(zone))
+               ret = release_mem_region_adjustable(&iomem_resource, start, size);
        if (ret) {
                resource_size_t endres = start + size - 1;
 
@@ -1215,8 +1218,13 @@ static int should_add_memory_movable(int nid, u64 start, u64 size)
        return 0;
 }
 
-int zone_for_memory(int nid, u64 start, u64 size, int zone_default)
+int zone_for_memory(int nid, u64 start, u64 size, int zone_default,
+               bool for_device)
 {
+#ifdef CONFIG_ZONE_DEVICE
+       if (for_device)
+               return ZONE_DEVICE;
+#endif
        if (should_add_memory_movable(nid, start, size))
                return ZONE_MOVABLE;
 
@@ -1265,7 +1273,7 @@ int __ref add_memory(int nid, u64 start, u64 size)
        }
 
        /* call arch's memory hotadd */
-       ret = arch_add_memory(nid, start, size);
+       ret = arch_add_memory(nid, start, size, false);
 
        if (ret < 0)
                goto error;
index 5b5240b..b401d40 100644 (file)
@@ -206,6 +206,9 @@ static char * const zone_names[MAX_NR_ZONES] = {
         "HighMem",
 #endif
         "Movable",
+#ifdef CONFIG_ZONE_DEVICE
+        "Device",
+#endif
 };
 
 int min_free_kbytes = 1024;
index f56914c..38b00ec 100644 (file)
@@ -1,9 +1,12 @@
-ldflags-y += --wrap=ioremap_wt
 ldflags-y += --wrap=ioremap_wc
+ldflags-y += --wrap=memremap
 ldflags-y += --wrap=devm_ioremap_nocache
-ldflags-y += --wrap=ioremap_cache
+ldflags-y += --wrap=devm_memremap
+ldflags-y += --wrap=devm_memunmap
 ldflags-y += --wrap=ioremap_nocache
 ldflags-y += --wrap=iounmap
+ldflags-y += --wrap=memunmap
+ldflags-y += --wrap=__devm_request_region
 ldflags-y += --wrap=__request_region
 ldflags-y += --wrap=__release_region
 
@@ -15,6 +18,7 @@ obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
 obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
 obj-$(CONFIG_ND_BTT) += nd_btt.o
 obj-$(CONFIG_ND_BLK) += nd_blk.o
+obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
 obj-$(CONFIG_ACPI_NFIT) += nfit.o
 
 nfit-y := $(ACPI_SRC)/nfit.o
@@ -29,6 +33,9 @@ nd_btt-y += config_check.o
 nd_blk-y := $(NVDIMM_SRC)/blk.o
 nd_blk-y += config_check.o
 
+nd_e820-y := $(NVDIMM_SRC)/e820.o
+nd_e820-y += config_check.o
+
 libnvdimm-y := $(NVDIMM_SRC)/core.o
 libnvdimm-y += $(NVDIMM_SRC)/bus.o
 libnvdimm-y += $(NVDIMM_SRC)/dimm_devs.o
@@ -37,7 +44,9 @@ libnvdimm-y += $(NVDIMM_SRC)/region_devs.o
 libnvdimm-y += $(NVDIMM_SRC)/region.o
 libnvdimm-y += $(NVDIMM_SRC)/namespace_devs.o
 libnvdimm-y += $(NVDIMM_SRC)/label.o
+libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o
 libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o
+libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o
 libnvdimm-y += config_check.o
 
 obj-m += test/
index 64bfaa5..b725131 100644 (file)
@@ -80,23 +80,52 @@ void __iomem *__wrap_devm_ioremap_nocache(struct device *dev,
 }
 EXPORT_SYMBOL(__wrap_devm_ioremap_nocache);
 
-void __iomem *__wrap_ioremap_cache(resource_size_t offset, unsigned long size)
+void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
+               size_t size, unsigned long flags)
 {
-       return __nfit_test_ioremap(offset, size, ioremap_cache);
+       struct nfit_test_resource *nfit_res;
+
+       rcu_read_lock();
+       nfit_res = get_nfit_res(offset);
+       rcu_read_unlock();
+       if (nfit_res)
+               return nfit_res->buf + offset - nfit_res->res->start;
+       return devm_memremap(dev, offset, size, flags);
 }
-EXPORT_SYMBOL(__wrap_ioremap_cache);
+EXPORT_SYMBOL(__wrap_devm_memremap);
 
-void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size)
+void *__wrap_memremap(resource_size_t offset, size_t size,
+               unsigned long flags)
 {
-       return __nfit_test_ioremap(offset, size, ioremap_nocache);
+       struct nfit_test_resource *nfit_res;
+
+       rcu_read_lock();
+       nfit_res = get_nfit_res(offset);
+       rcu_read_unlock();
+       if (nfit_res)
+               return nfit_res->buf + offset - nfit_res->res->start;
+       return memremap(offset, size, flags);
 }
-EXPORT_SYMBOL(__wrap_ioremap_nocache);
+EXPORT_SYMBOL(__wrap_memremap);
+
+void __wrap_devm_memunmap(struct device *dev, void *addr)
+{
+       struct nfit_test_resource *nfit_res;
+
+       rcu_read_lock();
+       nfit_res = get_nfit_res((unsigned long) addr);
+       rcu_read_unlock();
+       if (nfit_res)
+               return;
+       return devm_memunmap(dev, addr);
+}
+EXPORT_SYMBOL(__wrap_devm_memunmap);
 
-void __iomem *__wrap_ioremap_wt(resource_size_t offset, unsigned long size)
+void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size)
 {
-       return __nfit_test_ioremap(offset, size, ioremap_wt);
+       return __nfit_test_ioremap(offset, size, ioremap_nocache);
 }
-EXPORT_SYMBOL(__wrap_ioremap_wt);
+EXPORT_SYMBOL(__wrap_ioremap_nocache);
 
 void __iomem *__wrap_ioremap_wc(resource_size_t offset, unsigned long size)
 {
@@ -117,9 +146,22 @@ void __wrap_iounmap(volatile void __iomem *addr)
 }
 EXPORT_SYMBOL(__wrap_iounmap);
 
-struct resource *__wrap___request_region(struct resource *parent,
-               resource_size_t start, resource_size_t n, const char *name,
-               int flags)
+void __wrap_memunmap(void *addr)
+{
+       struct nfit_test_resource *nfit_res;
+
+       rcu_read_lock();
+       nfit_res = get_nfit_res((unsigned long) addr);
+       rcu_read_unlock();
+       if (nfit_res)
+               return;
+       return memunmap(addr);
+}
+EXPORT_SYMBOL(__wrap_memunmap);
+
+static struct resource *nfit_test_request_region(struct device *dev,
+               struct resource *parent, resource_size_t start,
+               resource_size_t n, const char *name, int flags)
 {
        struct nfit_test_resource *nfit_res;
 
@@ -147,10 +189,29 @@ struct resource *__wrap___request_region(struct resource *parent,
                        return res;
                }
        }
+       if (dev)
+               return __devm_request_region(dev, parent, start, n, name);
        return __request_region(parent, start, n, name, flags);
 }
+
+struct resource *__wrap___request_region(struct resource *parent,
+               resource_size_t start, resource_size_t n, const char *name,
+               int flags)
+{
+       return nfit_test_request_region(NULL, parent, start, n, name, flags);
+}
 EXPORT_SYMBOL(__wrap___request_region);
 
+struct resource *__wrap___devm_request_region(struct device *dev,
+               struct resource *parent, resource_size_t start,
+               resource_size_t n, const char *name)
+{
+       if (!dev)
+               return NULL;
+       return nfit_test_request_region(dev, parent, start, n, name, 0);
+}
+EXPORT_SYMBOL(__wrap___devm_request_region);
+
 void __wrap___release_region(struct resource *parent, resource_size_t start,
                                resource_size_t n)
 {
index d0bdae4..021e6f9 100644 (file)
@@ -147,75 +147,153 @@ static struct nfit_test *to_nfit_test(struct device *dev)
        return container_of(pdev, struct nfit_test, pdev);
 }
 
+static int nfit_test_cmd_get_config_size(struct nd_cmd_get_config_size *nd_cmd,
+               unsigned int buf_len)
+{
+       if (buf_len < sizeof(*nd_cmd))
+               return -EINVAL;
+
+       nd_cmd->status = 0;
+       nd_cmd->config_size = LABEL_SIZE;
+       nd_cmd->max_xfer = SZ_4K;
+
+       return 0;
+}
+
+static int nfit_test_cmd_get_config_data(struct nd_cmd_get_config_data_hdr
+               *nd_cmd, unsigned int buf_len, void *label)
+{
+       unsigned int len, offset = nd_cmd->in_offset;
+       int rc;
+
+       if (buf_len < sizeof(*nd_cmd))
+               return -EINVAL;
+       if (offset >= LABEL_SIZE)
+               return -EINVAL;
+       if (nd_cmd->in_length + sizeof(*nd_cmd) > buf_len)
+               return -EINVAL;
+
+       nd_cmd->status = 0;
+       len = min(nd_cmd->in_length, LABEL_SIZE - offset);
+       memcpy(nd_cmd->out_buf, label + offset, len);
+       rc = buf_len - sizeof(*nd_cmd) - len;
+
+       return rc;
+}
+
+static int nfit_test_cmd_set_config_data(struct nd_cmd_set_config_hdr *nd_cmd,
+               unsigned int buf_len, void *label)
+{
+       unsigned int len, offset = nd_cmd->in_offset;
+       u32 *status;
+       int rc;
+
+       if (buf_len < sizeof(*nd_cmd))
+               return -EINVAL;
+       if (offset >= LABEL_SIZE)
+               return -EINVAL;
+       if (nd_cmd->in_length + sizeof(*nd_cmd) + 4 > buf_len)
+               return -EINVAL;
+
+       status = (void *)nd_cmd + nd_cmd->in_length + sizeof(*nd_cmd);
+       *status = 0;
+       len = min(nd_cmd->in_length, LABEL_SIZE - offset);
+       memcpy(label + offset, nd_cmd->in_buf, len);
+       rc = buf_len - sizeof(*nd_cmd) - (len + 4);
+
+       return rc;
+}
+
+static int nfit_test_cmd_ars_cap(struct nd_cmd_ars_cap *nd_cmd,
+               unsigned int buf_len)
+{
+       if (buf_len < sizeof(*nd_cmd))
+               return -EINVAL;
+
+       nd_cmd->max_ars_out = 256;
+       nd_cmd->status = (ND_ARS_PERSISTENT | ND_ARS_VOLATILE) << 16;
+
+       return 0;
+}
+
+static int nfit_test_cmd_ars_start(struct nd_cmd_ars_start *nd_cmd,
+               unsigned int buf_len)
+{
+       if (buf_len < sizeof(*nd_cmd))
+               return -EINVAL;
+
+       nd_cmd->status = 0;
+
+       return 0;
+}
+
+static int nfit_test_cmd_ars_status(struct nd_cmd_ars_status *nd_cmd,
+               unsigned int buf_len)
+{
+       if (buf_len < sizeof(*nd_cmd))
+               return -EINVAL;
+
+       nd_cmd->out_length = 256;
+       nd_cmd->num_records = 0;
+       nd_cmd->status = 0;
+
+       return 0;
+}
+
 static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
                struct nvdimm *nvdimm, unsigned int cmd, void *buf,
                unsigned int buf_len)
 {
        struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
        struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc);
-       struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
-       int i, rc;
+       int i, rc = 0;
 
-       if (!nfit_mem || !test_bit(cmd, &nfit_mem->dsm_mask))
-               return -ENOTTY;
+       if (nvdimm) {
+               struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
 
-       /* lookup label space for the given dimm */
-       for (i = 0; i < ARRAY_SIZE(handle); i++)
-               if (__to_nfit_memdev(nfit_mem)->device_handle == handle[i])
+               if (!nfit_mem || !test_bit(cmd, &nfit_mem->dsm_mask))
+                       return -ENOTTY;
+
+               /* lookup label space for the given dimm */
+               for (i = 0; i < ARRAY_SIZE(handle); i++)
+                       if (__to_nfit_memdev(nfit_mem)->device_handle ==
+                                       handle[i])
+                               break;
+               if (i >= ARRAY_SIZE(handle))
+                       return -ENXIO;
+
+               switch (cmd) {
+               case ND_CMD_GET_CONFIG_SIZE:
+                       rc = nfit_test_cmd_get_config_size(buf, buf_len);
                        break;
-       if (i >= ARRAY_SIZE(handle))
-               return -ENXIO;
+               case ND_CMD_GET_CONFIG_DATA:
+                       rc = nfit_test_cmd_get_config_data(buf, buf_len,
+                               t->label[i]);
+                       break;
+               case ND_CMD_SET_CONFIG_DATA:
+                       rc = nfit_test_cmd_set_config_data(buf, buf_len,
+                               t->label[i]);
+                       break;
+               default:
+                       return -ENOTTY;
+               }
+       } else {
+               if (!nd_desc || !test_bit(cmd, &nd_desc->dsm_mask))
+                       return -ENOTTY;
 
-       switch (cmd) {
-       case ND_CMD_GET_CONFIG_SIZE: {
-               struct nd_cmd_get_config_size *nd_cmd = buf;
-
-               if (buf_len < sizeof(*nd_cmd))
-                       return -EINVAL;
-               nd_cmd->status = 0;
-               nd_cmd->config_size = LABEL_SIZE;
-               nd_cmd->max_xfer = SZ_4K;
-               rc = 0;
-               break;
-       }
-       case ND_CMD_GET_CONFIG_DATA: {
-               struct nd_cmd_get_config_data_hdr *nd_cmd = buf;
-               unsigned int len, offset = nd_cmd->in_offset;
-
-               if (buf_len < sizeof(*nd_cmd))
-                       return -EINVAL;
-               if (offset >= LABEL_SIZE)
-                       return -EINVAL;
-               if (nd_cmd->in_length + sizeof(*nd_cmd) > buf_len)
-                       return -EINVAL;
-
-               nd_cmd->status = 0;
-               len = min(nd_cmd->in_length, LABEL_SIZE - offset);
-               memcpy(nd_cmd->out_buf, t->label[i] + offset, len);
-               rc = buf_len - sizeof(*nd_cmd) - len;
-               break;
-       }
-       case ND_CMD_SET_CONFIG_DATA: {
-               struct nd_cmd_set_config_hdr *nd_cmd = buf;
-               unsigned int len, offset = nd_cmd->in_offset;
-               u32 *status;
-
-               if (buf_len < sizeof(*nd_cmd))
-                       return -EINVAL;
-               if (offset >= LABEL_SIZE)
-                       return -EINVAL;
-               if (nd_cmd->in_length + sizeof(*nd_cmd) + 4 > buf_len)
-                       return -EINVAL;
-
-               status = buf + nd_cmd->in_length + sizeof(*nd_cmd);
-               *status = 0;
-               len = min(nd_cmd->in_length, LABEL_SIZE - offset);
-               memcpy(t->label[i] + offset, nd_cmd->in_buf, len);
-               rc = buf_len - sizeof(*nd_cmd) - (len + 4);
-               break;
-       }
-       default:
-               return -ENOTTY;
+               switch (cmd) {
+               case ND_CMD_ARS_CAP:
+                       rc = nfit_test_cmd_ars_cap(buf, buf_len);
+                       break;
+               case ND_CMD_ARS_START:
+                       rc = nfit_test_cmd_ars_start(buf, buf_len);
+                       break;
+               case ND_CMD_ARS_STATUS:
+                       rc = nfit_test_cmd_ars_status(buf, buf_len);
+                       break;
+               default:
+                       return -ENOTTY;
+               }
        }
 
        return rc;
@@ -876,6 +954,9 @@ static void nfit_test0_setup(struct nfit_test *t)
        set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_dsm_force_en);
        set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en);
        set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en);
+       set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_dsm_force_en);
+       set_bit(ND_CMD_ARS_START, &acpi_desc->bus_dsm_force_en);
+       set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_dsm_force_en);
        nd_desc = &acpi_desc->nd_desc;
        nd_desc->ndctl = nfit_test_ctl;
 }
@@ -948,9 +1029,13 @@ static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
 
        lane = nd_region_acquire_lane(nd_region);
        if (rw)
-               memcpy(mmio->base + dpa, iobuf, len);
-       else
-               memcpy(iobuf, mmio->base + dpa, len);
+               memcpy(mmio->addr.base + dpa, iobuf, len);
+       else {
+               memcpy(iobuf, mmio->addr.base + dpa, len);
+
+               /* give us some some coverage of the mmio_flush_range() API */
+               mmio_flush_range(mmio->addr.base + dpa, len);
+       }
        nd_region_release_lane(nd_region, lane);
 
        return 0;