Merge branch 'perf/urgent' into perf/core, to pick up fixes

author Ingo Molnar <mingo@kernel.org>

Thu, 4 Feb 2016 07:57:44 +0000 (08:57 +0100)

committer Ingo Molnar <mingo@kernel.org>

Thu, 4 Feb 2016 07:57:44 +0000 (08:57 +0100)
author Ingo Molnar <mingo@kernel.org>
Thu, 4 Feb 2016 07:57:44 +0000 (08:57 +0100)
committer Ingo Molnar <mingo@kernel.org>
Thu, 4 Feb 2016 07:57:44 +0000 (08:57 +0100)
diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt

index 65b3eac..e8d25e7 100644 (file)
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -843,6 +843,10 @@ PAGE_SIZE multiple when read back.
                 Amount of memory used to cache filesystem data,
                 including tmpfs and shared memory.
  
+         sock
+
+               Amount of memory used in network transmission buffers
+
           file_mapped
  
                 Amount of cached filesystem data mapped with mmap()
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt

index fde9fd0..843b045 100644 (file)
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -240,8 +240,8 @@ Table 1-2: Contents of the status files (as of 4.1)
   RssFile                     size of resident file mappings
   RssShmem                    size of resident shmem memory (includes SysV shm,
                               mapping of tmpfs and shared anonymous mappings)
- VmData                      size of data, stack, and text segments
- VmStk                       size of data, stack, and text segments
+ VmData                      size of private data segments
+ VmStk                       size of stack segments
   VmExe                       size of text segment
   VmLib                       size of shared library code
   VmPTE                       size of page table entries
@@ -356,7 +356,7 @@ address           perms offset  dev   inode      pathname
  a7cb1000-a7cb2000 ---p 00000000 00:00 0
  a7cb2000-a7eb2000 rw-p 00000000 00:00 0
  a7eb2000-a7eb3000 ---p 00000000 00:00 0
-a7eb3000-a7ed5000 rw-p 00000000 00:00 0          [stack:1001]
+a7eb3000-a7ed5000 rw-p 00000000 00:00 0
  a7ed5000-a8008000 r-xp 00000000 03:00 4222       /lib/libc.so.6
  a8008000-a800a000 r--p 00133000 03:00 4222       /lib/libc.so.6
  a800a000-a800b000 rw-p 00135000 03:00 4222       /lib/libc.so.6
@@ -388,7 +388,6 @@ is not associated with a file:
  
   [heap]                   = the heap of the program
   [stack]                  = the stack of the main process
- [stack:1001]             = the stack of the thread with tid 1001
   [vdso]                   = the "virtual dynamic shared object",
                              the kernel system call handler
  
@@ -396,10 +395,8 @@ is not associated with a file:
  
  The /proc/PID/task/TID/maps is a view of the virtual memory from the viewpoint
  of the individual tasks of a process. In this file you will see a mapping marked
-as [stack] if that task sees it as a stack. This is a key difference from the
-content of /proc/PID/maps, where you will see all mappings that are being used
-as stack by all of those tasks. Hence, for the example above, the task-level
-map, i.e. /proc/PID/task/TID/maps for thread 1001 will look like this:
+as [stack] if that task sees it as a stack. Hence, for the example above, the
+task-level map, i.e. /proc/PID/task/TID/maps for thread 1001 will look like this:
  
  08048000-08049000 r-xp 00000000 03:00 8312       /opt/test
  08049000-0804a000 rw-p 00001000 03:00 8312       /opt/test
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt

index 87d40a7..551ecf0 100644 (file)
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1496,6 +1496,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                         could change it dynamically, usually by
                         /sys/module/printk/parameters/ignore_loglevel.
  
+       ignore_rlimit_data
+                       Ignore RLIMIT_DATA setting for data mappings,
+                       print warning at first misuse.  Can be changed via
+                       /sys/module/kernel/parameters/ignore_rlimit_data.
+
         ihash_entries=  [KNL]
                         Set number of hash buckets for inode cache.
  
diff --git a/MAINTAINERS b/MAINTAINERS

index f678c37..24c5b9a 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -967,6 +967,8 @@ M:  Rob Herring <robh@kernel.org>
  L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:     Maintained
  F:     arch/arm/mach-highbank/
+F:     arch/arm/boot/dts/highbank.dts
+F:     arch/arm/boot/dts/ecx-*.dts*
  
  ARM/CAVIUM NETWORKS CNS3XXX MACHINE SUPPORT
  M:     Krzysztof Halasa <khalasa@piap.pl>
@@ -1042,6 +1044,7 @@ M:        Barry Song <baohua@kernel.org>
  L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  T:     git git://git.kernel.org/pub/scm/linux/kernel/git/baohua/linux.git
  S:     Maintained
+F:     arch/arm/boot/dts/prima2*
  F:     arch/arm/mach-prima2/
  F:     drivers/clk/sirf/
  F:     drivers/clocksource/timer-prima2.c
@@ -1143,6 +1146,10 @@ W:       http://www.hisilicon.com
  S:     Supported
  T:     git git://github.com/hisilicon/linux-hisi.git
  F:     arch/arm/mach-hisi/
+F:     arch/arm/boot/dts/hi3*
+F:     arch/arm/boot/dts/hip*
+F:     arch/arm/boot/dts/hisi*
+F:     arch/arm64/boot/dts/hisilicon/
  
  ARM/HP JORNADA 7XX MACHINE SUPPORT
  M:     Kristoffer Ericson <kristoffer.ericson@gmail.com>
@@ -1219,6 +1226,7 @@ M:        Santosh Shilimkar <ssantosh@kernel.org>
  L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:     Maintained
  F:     arch/arm/mach-keystone/
+F:     arch/arm/boot/dts/k2*
  T:     git git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone.git
  
  ARM/TEXAS INSTRUMENT KEYSTONE CLOCK FRAMEWORK
@@ -1287,6 +1295,7 @@ L:        linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:     Maintained
  F:     arch/arm/mach-berlin/
  F:     arch/arm/boot/dts/berlin*
+F:     arch/arm64/boot/dts/marvell/berlin*
  
  
  ARM/Marvell Dove/MV78xx0/Orion SOC support
@@ -1425,6 +1434,7 @@ S:        Maintained
  F:     arch/arm/boot/dts/qcom-*.dts
  F:     arch/arm/boot/dts/qcom-*.dtsi
  F:     arch/arm/mach-qcom/
+F:     arch/arm64/boot/dts/qcom/*
  F:     drivers/soc/qcom/
  F:     drivers/tty/serial/msm_serial.h
  F:     drivers/tty/serial/msm_serial.c
@@ -1484,6 +1494,8 @@ L:        linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  L:     linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
  S:     Maintained
  F:     arch/arm/boot/dts/s3c*
+F:     arch/arm/boot/dts/s5p*
+F:     arch/arm/boot/dts/samsung*
  F:     arch/arm/boot/dts/exynos*
  F:     arch/arm64/boot/dts/exynos/
  F:     arch/arm/plat-samsung/
@@ -1563,6 +1575,7 @@ S:        Maintained
  F:     arch/arm/mach-socfpga/
  F:     arch/arm/boot/dts/socfpga*
  F:     arch/arm/configs/socfpga_defconfig
+F:     arch/arm64/boot/dts/altera/
  W:     http://www.rocketboards.org
  T:     git git://git.kernel.org/pub/scm/linux/kernel/git/dinguyen/linux.git
  
@@ -1716,7 +1729,7 @@ M:        Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
  L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:     Maintained
  F:     arch/arm/boot/dts/vexpress*
-F:     arch/arm64/boot/dts/arm/vexpress*
+F:     arch/arm64/boot/dts/arm/
  F:     arch/arm/mach-vexpress/
  F:     */*/vexpress*
  F:     */*/*/vexpress*
@@ -2343,6 +2356,7 @@ F:        arch/arm/mach-bcm/
  F:     arch/arm/boot/dts/bcm113*
  F:     arch/arm/boot/dts/bcm216*
  F:     arch/arm/boot/dts/bcm281*
+F:     arch/arm64/boot/dts/broadcom/
  F:     arch/arm/configs/bcm_defconfig
  F:     drivers/mmc/host/sdhci-bcm-kona.c
  F:     drivers/clocksource/bcm_kona_timer.c
@@ -8818,6 +8832,7 @@ L:        linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  T:     git git://github.com/hzhuang1/linux.git
  T:     git git://github.com/rjarzmik/linux.git
  S:     Maintained
+F:     arch/arm/boot/dts/pxa*
  F:     arch/arm/mach-pxa/
  F:     drivers/dma/pxa*
  F:     drivers/pcmcia/pxa2xx*
@@ -8847,6 +8862,7 @@ L:        linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  T:     git git://github.com/hzhuang1/linux.git
  T:     git git://git.linaro.org/people/ycmiao/pxa-linux.git
  S:     Maintained
+F:     arch/arm/boot/dts/mmp*
  F:     arch/arm/mach-mmp/
  
  PXA MMCI DRIVER
@@ -10291,6 +10307,7 @@ L:      spear-devel@list.st.com
  L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  W:     http://www.st.com/spear
  S:     Maintained
+F:     arch/arm/boot/dts/spear*
  F:     arch/arm/mach-spear/
  
  SPEAR CLOCK FRAMEWORK SUPPORT
@@ -12133,7 +12150,7 @@ F:      drivers/net/hamradio/*scc.c
  F:     drivers/net/hamradio/z8530.h
  
  ZBUD COMPRESSED PAGE ALLOCATOR
-M:     Seth Jennings <sjennings@variantweb.net>
+M:     Seth Jennings <sjenning@redhat.com>
  L:     linux-mm@kvack.org
  S:     Maintained
  F:     mm/zbud.c
@@ -12188,7 +12205,7 @@ F:      include/linux/zsmalloc.h
  F:     Documentation/vm/zsmalloc.txt
  
  ZSWAP COMPRESSED SWAP CACHING
-M:     Seth Jennings <sjennings@variantweb.net>
+M:     Seth Jennings <sjenning@redhat.com>
  L:     linux-mm@kvack.org
  S:     Maintained
  F:     mm/zswap.c
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c

index 9fda22e..7fddd86 100644 (file)
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -68,6 +68,7 @@
  #include <linux/of_platform.h>
  #include <linux/of_address.h>
  #include <linux/of_irq.h>
+#include <linux/acpi.h>
  
  #ifdef CONFIG_PARISC
  #include <asm/hardware.h>      /* for register_parisc_driver() stuff */
@@ -2054,8 +2055,6 @@ static int hardcode_find_bmc(void)
  
  #ifdef CONFIG_ACPI
  
-#include <linux/acpi.h>
-
  /*
   * Once we get an ACPI failure, we don't try any more, because we go
   * through the tables sequentially.  Once we don't find a table, there
diff --git a/drivers/hwspinlock/hwspinlock_core.c b/drivers/hwspinlock/hwspinlock_core.c

index 52f708b..d50c701 100644 (file)
--- a/drivers/hwspinlock/hwspinlock_core.c
+++ b/drivers/hwspinlock/hwspinlock_core.c
@@ -313,6 +313,10 @@ int of_hwspin_lock_get_id(struct device_node *np, int index)
                 hwlock = radix_tree_deref_slot(slot);
                 if (unlikely(!hwlock))
                         continue;
+               if (radix_tree_is_indirect_ptr(hwlock)) {
+                       slot = radix_tree_iter_retry(&iter);
+                       continue;
+               }
  
                 if (hwlock->bank->dev->of_node == args.np) {
                         ret = 0;
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c

index 503ab8b..5e82067 100644 (file)
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1261,7 +1261,7 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma)
         }
  
         sfp->mmap_called = 1;
-       vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+       vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
         vma->vm_private_data = sfp;
         vma->vm_ops = &sg_mmap_vm_ops;
         return 0;
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c

index 5bcd92d..0cb1abd 100644 (file)
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1215,7 +1215,7 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
                                         hdr->pgio_mirror_idx + 1,
                                         &hdr->pgio_mirror_idx))
                         goto out_eagain;
-               set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
+               set_bit(NFS_LAYOUT_RETURN_REQUESTED,
                         &hdr->lseg->pls_layout->plh_flags);
                 pnfs_read_resend_pnfs(hdr);
                 return task->tk_status;
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c

index 29898a9..eb37046 100644 (file)
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -412,7 +412,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
                                          OP_ILLEGAL, GFP_NOIO);
                 if (!fail_return) {
                         if (ff_layout_has_available_ds(lseg))
-                               set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
+                               set_bit(NFS_LAYOUT_RETURN_REQUESTED,
                                         &lseg->pls_layout->plh_flags);
                         else
                                 pnfs_error_mark_layout_for_return(ino, lseg);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c

index a3592cc..482b6e9 100644 (file)
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -52,9 +52,7 @@ static DEFINE_SPINLOCK(pnfs_spinlock);
   */
  static LIST_HEAD(pnfs_modules_tbl);
  
-static int
-pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid,
-                      enum pnfs_iomode iomode, bool sync);
+static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo);
  
  /* Return the registered pnfs layout driver module matching given id */
  static struct pnfs_layoutdriver_type *
@@ -243,6 +241,8 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
  {
         struct inode *inode = lo->plh_inode;
  
+       pnfs_layoutreturn_before_put_layout_hdr(lo);
+
         if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
                 if (!list_empty(&lo->plh_segs))
                         WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n");
@@ -345,58 +345,6 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
         rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
  }
  
-/* Return true if layoutreturn is needed */
-static bool
-pnfs_layout_need_return(struct pnfs_layout_hdr *lo,
-                       struct pnfs_layout_segment *lseg)
-{
-       struct pnfs_layout_segment *s;
-
-       if (!test_and_clear_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
-               return false;
-
-       list_for_each_entry(s, &lo->plh_segs, pls_list)
-               if (s != lseg && test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags))
-                       return false;
-
-       return true;
-}
-
-static bool
-pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo)
-{
-       if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
-               return false;
-       lo->plh_return_iomode = 0;
-       pnfs_get_layout_hdr(lo);
-       clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags);
-       return true;
-}
-
-static void pnfs_layoutreturn_before_put_lseg(struct pnfs_layout_segment *lseg,
-               struct pnfs_layout_hdr *lo, struct inode *inode)
-{
-       lo = lseg->pls_layout;
-       inode = lo->plh_inode;
-
-       spin_lock(&inode->i_lock);
-       if (pnfs_layout_need_return(lo, lseg)) {
-               nfs4_stateid stateid;
-               enum pnfs_iomode iomode;
-               bool send;
-
-               nfs4_stateid_copy(&stateid, &lo->plh_stateid);
-               iomode = lo->plh_return_iomode;
-               send = pnfs_prepare_layoutreturn(lo);
-               spin_unlock(&inode->i_lock);
-               if (send) {
-                       /* Send an async layoutreturn so we dont deadlock */
-                       pnfs_send_layoutreturn(lo, &stateid, iomode, false);
-               }
-       } else
-               spin_unlock(&inode->i_lock);
-}
-
  void
  pnfs_put_lseg(struct pnfs_layout_segment *lseg)
  {
@@ -410,15 +358,8 @@ pnfs_put_lseg(struct pnfs_layout_segment *lseg)
                 atomic_read(&lseg->pls_refcount),
                 test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
  
-       /* Handle the case where refcount != 1 */
-       if (atomic_add_unless(&lseg->pls_refcount, -1, 1))
-               return;
-
         lo = lseg->pls_layout;
         inode = lo->plh_inode;
-       /* Do we need a layoutreturn? */
-       if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
-               pnfs_layoutreturn_before_put_lseg(lseg, lo, inode);
  
         if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
                 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags)) {
@@ -937,6 +878,17 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
         rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
  }
  
+static bool
+pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo)
+{
+       if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
+               return false;
+       lo->plh_return_iomode = 0;
+       pnfs_get_layout_hdr(lo);
+       clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
+       return true;
+}
+
  static int
  pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid,
                        enum pnfs_iomode iomode, bool sync)
@@ -971,6 +923,48 @@ out:
         return status;
  }
  
+/* Return true if layoutreturn is needed */
+static bool
+pnfs_layout_need_return(struct pnfs_layout_hdr *lo)
+{
+       struct pnfs_layout_segment *s;
+
+       if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
+               return false;
+
+       /* Defer layoutreturn until all lsegs are done */
+       list_for_each_entry(s, &lo->plh_segs, pls_list) {
+               if (test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags))
+                       return false;
+       }
+
+       return true;
+}
+
+static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo)
+{
+       struct inode *inode= lo->plh_inode;
+
+       if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
+               return;
+       spin_lock(&inode->i_lock);
+       if (pnfs_layout_need_return(lo)) {
+               nfs4_stateid stateid;
+               enum pnfs_iomode iomode;
+               bool send;
+
+               nfs4_stateid_copy(&stateid, &lo->plh_stateid);
+               iomode = lo->plh_return_iomode;
+               send = pnfs_prepare_layoutreturn(lo);
+               spin_unlock(&inode->i_lock);
+               if (send) {
+                       /* Send an async layoutreturn so we dont deadlock */
+                       pnfs_send_layoutreturn(lo, &stateid, iomode, false);
+               }
+       } else
+               spin_unlock(&inode->i_lock);
+}
+
  /*
   * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr
   * when the layout segment list is empty.
@@ -1091,7 +1085,7 @@ bool pnfs_roc(struct inode *ino)
  
         nfs4_stateid_copy(&stateid, &lo->plh_stateid);
         /* always send layoutreturn if being marked so */
-       if (test_and_clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
+       if (test_and_clear_bit(NFS_LAYOUT_RETURN_REQUESTED,
                                    &lo->plh_flags))
                 layoutreturn = pnfs_prepare_layoutreturn(lo);
  
@@ -1772,7 +1766,7 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
                         pnfs_set_plh_return_iomode(lo, return_range->iomode);
                         if (!mark_lseg_invalid(lseg, tmp_list))
                                 remaining++;
-                       set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
+                       set_bit(NFS_LAYOUT_RETURN_REQUESTED,
                                         &lo->plh_flags);
                 }
         return remaining;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h

index 9f4e2a4..1ac1db5 100644 (file)
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -94,8 +94,8 @@ enum {
         NFS_LAYOUT_RO_FAILED = 0,       /* get ro layout failed stop trying */
         NFS_LAYOUT_RW_FAILED,           /* get rw layout failed stop trying */
         NFS_LAYOUT_BULK_RECALL,         /* bulk recall affecting layout */
-       NFS_LAYOUT_RETURN,              /* Return this layout ASAP */
-       NFS_LAYOUT_RETURN_BEFORE_CLOSE, /* Return this layout before close */
+       NFS_LAYOUT_RETURN,              /* layoutreturn in progress */
+       NFS_LAYOUT_RETURN_REQUESTED,    /* Return this layout ASAP */
         NFS_LAYOUT_INVALID_STID,        /* layout stateid id is invalid */
         NFS_LAYOUT_FIRST_LAYOUTGET,     /* Serialize first layoutget */
  };
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c

index a3cc6d2..a76b9ea 100644 (file)
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1254,15 +1254,15 @@ static const struct file_operations o2hb_debug_fops = {
  
  void o2hb_exit(void)
  {
-       kfree(o2hb_db_livenodes);
-       kfree(o2hb_db_liveregions);
-       kfree(o2hb_db_quorumregions);
-       kfree(o2hb_db_failedregions);
         debugfs_remove(o2hb_debug_failedregions);
         debugfs_remove(o2hb_debug_quorumregions);
         debugfs_remove(o2hb_debug_liveregions);
         debugfs_remove(o2hb_debug_livenodes);
         debugfs_remove(o2hb_debug_dir);
+       kfree(o2hb_db_livenodes);
+       kfree(o2hb_db_liveregions);
+       kfree(o2hb_db_quorumregions);
+       kfree(o2hb_db_failedregions);
  }
  
  static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir,
@@ -1438,13 +1438,15 @@ static void o2hb_region_release(struct config_item *item)
  
         kfree(reg->hr_slots);
  
-       kfree(reg->hr_db_regnum);
-       kfree(reg->hr_db_livenodes);
         debugfs_remove(reg->hr_debug_livenodes);
         debugfs_remove(reg->hr_debug_regnum);
         debugfs_remove(reg->hr_debug_elapsed_time);
         debugfs_remove(reg->hr_debug_pinned);
         debugfs_remove(reg->hr_debug_dir);
+       kfree(reg->hr_db_livenodes);
+       kfree(reg->hr_db_regnum);
+       kfree(reg->hr_debug_elapsed_time);
+       kfree(reg->hr_debug_pinned);
  
         spin_lock(&o2hb_live_lock);
         list_del(&reg->hr_all_item);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c

index 85d16c6..fa95ab2 100644 (file)
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -259,23 +259,29 @@ static int do_maps_open(struct inode *inode, struct file *file,
                                 sizeof(struct proc_maps_private));
  }
  
-static pid_t pid_of_stack(struct proc_maps_private *priv,
-                               struct vm_area_struct *vma, bool is_pid)
+/*
+ * Indicate if the VMA is a stack for the given task; for
+ * /proc/PID/maps that is the stack of the main task.
+ */
+static int is_stack(struct proc_maps_private *priv,
+                   struct vm_area_struct *vma, int is_pid)
  {
-       struct inode *inode = priv->inode;
-       struct task_struct *task;
-       pid_t ret = 0;
+       int stack = 0;
+
+       if (is_pid) {
+               stack = vma->vm_start <= vma->vm_mm->start_stack &&
+                       vma->vm_end >= vma->vm_mm->start_stack;
+       } else {
+               struct inode *inode = priv->inode;
+               struct task_struct *task;
  
-       rcu_read_lock();
-       task = pid_task(proc_pid(inode), PIDTYPE_PID);
-       if (task) {
-               task = task_of_stack(task, vma, is_pid);
+               rcu_read_lock();
+               task = pid_task(proc_pid(inode), PIDTYPE_PID);
                 if (task)
-                       ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info);
+                       stack = vma_is_stack_for_task(vma, task);
+               rcu_read_unlock();
         }
-       rcu_read_unlock();
-
-       return ret;
+       return stack;
  }
  
  static void
@@ -335,8 +341,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
  
         name = arch_vma_name(vma);
         if (!name) {
-               pid_t tid;
-
                 if (!mm) {
                         name = "[vdso]";
                         goto done;
@@ -348,21 +352,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
                         goto done;
                 }
  
-               tid = pid_of_stack(priv, vma, is_pid);
-               if (tid != 0) {
-                       /*
-                        * Thread stack in /proc/PID/task/TID/maps or
-                        * the main process stack.
-                        */
-                       if (!is_pid || (vma->vm_start <= mm->start_stack &&
-                           vma->vm_end >= mm->start_stack)) {
-                               name = "[stack]";
-                       } else {
-                               /* Thread stack in /proc/PID/maps */
-                               seq_pad(m, ' ');
-                               seq_printf(m, "[stack:%d]", tid);
-                       }
-               }
+               if (is_stack(priv, vma, is_pid))
+                       name = "[stack]";
         }
  
  done:
@@ -1552,18 +1543,19 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
  static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
                 unsigned long addr, unsigned long end, struct mm_walk *walk)
  {
+       pte_t huge_pte = huge_ptep_get(pte);
         struct numa_maps *md;
         struct page *page;
  
-       if (!pte_present(*pte))
+       if (!pte_present(huge_pte))
                 return 0;
  
-       page = pte_page(*pte);
+       page = pte_page(huge_pte);
         if (!page)
                 return 0;
  
         md = walk->private;
-       gather_stats(page, md, pte_dirty(*pte), 1);
+       gather_stats(page, md, pte_dirty(huge_pte), 1);
         return 0;
  }
  
@@ -1617,19 +1609,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
                 seq_file_path(m, file, "\n\t= ");
         } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
                 seq_puts(m, " heap");
-       } else {
-               pid_t tid = pid_of_stack(proc_priv, vma, is_pid);
-               if (tid != 0) {
-                       /*
-                        * Thread stack in /proc/PID/task/TID/maps or
-                        * the main process stack.
-                        */
-                       if (!is_pid || (vma->vm_start <= mm->start_stack &&
-                           vma->vm_end >= mm->start_stack))
-                               seq_puts(m, " stack");
-                       else
-                               seq_printf(m, " stack:%d", tid);
-               }
+       } else if (is_stack(proc_priv, vma, is_pid)) {
+               seq_puts(m, " stack");
         }
  
         if (is_vm_hugetlb_page(vma))
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c

index e0d64c9..faacb0c 100644 (file)
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -123,23 +123,26 @@ unsigned long task_statm(struct mm_struct *mm,
         return size;
  }
  
-static pid_t pid_of_stack(struct proc_maps_private *priv,
-                               struct vm_area_struct *vma, bool is_pid)
+static int is_stack(struct proc_maps_private *priv,
+                   struct vm_area_struct *vma, int is_pid)
  {
-       struct inode *inode = priv->inode;
-       struct task_struct *task;
-       pid_t ret = 0;
-
-       rcu_read_lock();
-       task = pid_task(proc_pid(inode), PIDTYPE_PID);
-       if (task) {
-               task = task_of_stack(task, vma, is_pid);
+       struct mm_struct *mm = vma->vm_mm;
+       int stack = 0;
+
+       if (is_pid) {
+               stack = vma->vm_start <= mm->start_stack &&
+                       vma->vm_end >= mm->start_stack;
+       } else {
+               struct inode *inode = priv->inode;
+               struct task_struct *task;
+
+               rcu_read_lock();
+               task = pid_task(proc_pid(inode), PIDTYPE_PID);
                 if (task)
-                       ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info);
+                       stack = vma_is_stack_for_task(vma, task);
+               rcu_read_unlock();
         }
-       rcu_read_unlock();
-
-       return ret;
+       return stack;
  }
  
  /*
@@ -181,21 +184,9 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
         if (file) {
                 seq_pad(m, ' ');
                 seq_file_path(m, file, "");
-       } else if (mm) {
-               pid_t tid = pid_of_stack(priv, vma, is_pid);
-
-               if (tid != 0) {
-                       seq_pad(m, ' ');
-                       /*
-                        * Thread stack in /proc/PID/task/TID/maps or
-                        * the main process stack.
-                        */
-                       if (!is_pid || (vma->vm_start <= mm->start_stack &&
-                           vma->vm_end >= mm->start_stack))
-                               seq_printf(m, "[stack]");
-                       else
-                               seq_printf(m, "[stack:%d]", tid);
-               }
+       } else if (mm && is_stack(priv, vma, is_pid)) {
+               seq_pad(m, ' ');
+               seq_printf(m, "[stack]");
         }
  
         seq_putc(m, '\n');
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h

index 9ae48d4..792c898 100644 (file)
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -51,7 +51,7 @@ enum mem_cgroup_stat_index {
         MEM_CGROUP_STAT_SWAP,           /* # of pages, swapped out */
         MEM_CGROUP_STAT_NSTATS,
         /* default hierarchy stats */
-       MEMCG_SOCK,
+       MEMCG_SOCK = MEM_CGROUP_STAT_NSTATS,
         MEMCG_NR_STAT,
  };
  
diff --git a/include/linux/mm.h b/include/linux/mm.h

index f1cd22f..516e149 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -201,11 +201,13 @@ extern unsigned int kobjsize(const void *objp);
  #endif
  
  #ifdef CONFIG_STACK_GROWSUP
-#define VM_STACK_FLAGS (VM_GROWSUP | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+#define VM_STACK       VM_GROWSUP
  #else
-#define VM_STACK_FLAGS (VM_GROWSDOWN | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+#define VM_STACK       VM_GROWSDOWN
  #endif
  
+#define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
+
  /*
   * Special vmas that are non-mergable, non-mlock()able.
   * Note: mm/huge_memory.c VM_NO_THP depends on this definition.
@@ -1341,8 +1343,7 @@ static inline int stack_guard_page_end(struct vm_area_struct *vma,
                 !vma_growsup(vma->vm_next, addr);
  }
  
-extern struct task_struct *task_of_stack(struct task_struct *task,
-                               struct vm_area_struct *vma, bool in_group);
+int vma_is_stack_for_task(struct vm_area_struct *vma, struct task_struct *t);
  
  extern unsigned long move_page_tables(struct vm_area_struct *vma,
                 unsigned long old_addr, struct vm_area_struct *new_vma,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h

index d3ebb9d..624b78b 100644 (file)
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -424,9 +424,9 @@ struct mm_struct {
         unsigned long total_vm;         /* Total pages mapped */
         unsigned long locked_vm;        /* Pages that have PG_mlocked set */
         unsigned long pinned_vm;        /* Refcount permanently increased */
-       unsigned long data_vm;          /* VM_WRITE & ~VM_SHARED/GROWSDOWN */
-       unsigned long exec_vm;          /* VM_EXEC & ~VM_WRITE */
-       unsigned long stack_vm;         /* VM_GROWSUP/DOWN */
+       unsigned long data_vm;          /* VM_WRITE & ~VM_SHARED & ~VM_STACK */
+       unsigned long exec_vm;          /* VM_EXEC & ~VM_WRITE & ~VM_STACK */
+       unsigned long stack_vm;         /* VM_STACK */
         unsigned long def_flags;
         unsigned long start_code, end_code, start_data, end_data;
         unsigned long start_brk, brk, start_stack;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h

index 33bb1b1..7b6c2cf 100644 (file)
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -682,6 +682,12 @@ typedef struct pglist_data {
          */
         unsigned long first_deferred_pfn;
  #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       spinlock_t split_queue_lock;
+       struct list_head split_queue;
+       unsigned long split_queue_len;
+#endif
  } pg_data_t;
  
  #define node_present_pages(nid)        (NODE_DATA(nid)->node_present_pages)
diff --git a/include/linux/of.h b/include/linux/of.h

index dd10626..dc6e396 100644 (file)
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -929,7 +929,7 @@ static inline int of_get_available_child_count(const struct device_node *np)
         return num;
  }
  
-#ifdef CONFIG_OF
+#if defined(CONFIG_OF) && !defined(MODULE)
  #define _OF_DECLARE(table, name, compat, fn, fn_type)                  \
         static const struct of_device_id __of_table_##name              \
                 __used __section(__##table##_of_table)                  \
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h

index 7c88ad1..00b17c5 100644 (file)
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -378,6 +378,22 @@ radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start)
  void **radix_tree_next_chunk(struct radix_tree_root *root,
                              struct radix_tree_iter *iter, unsigned flags);
  
+/**
+ * radix_tree_iter_retry - retry this chunk of the iteration
+ * @iter:      iterator state
+ *
+ * If we iterate over a tree protected only by the RCU lock, a race
+ * against deletion or creation may result in seeing a slot for which
+ * radix_tree_deref_retry() returns true.  If so, call this function
+ * and continue the iteration.
+ */
+static inline __must_check
+void **radix_tree_iter_retry(struct radix_tree_iter *iter)
+{
+       iter->next_index = iter->index;
+       return NULL;
+}
+
  /**
   * radix_tree_chunk_size - get current chunk size
   *
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c

index dda9e67..202df6c 100644 (file)
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -125,6 +125,13 @@ check_stack(unsigned long ip, unsigned long *stack)
                         break;
         }
  
+       /*
+        * Some archs may not have the passed in ip in the dump.
+        * If that happens, we need to show everything.
+        */
+       if (i == stack_trace_max.nr_entries)
+               i = 0;
+
         /*
          * Now find where in the stack these are.
          */
diff --git a/lib/radix-tree.c b/lib/radix-tree.c

index fcf5d98..6b79e90 100644 (file)
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -1019,9 +1019,13 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
                 return 0;
  
         radix_tree_for_each_slot(slot, root, &iter, first_index) {
-               results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot));
+               results[ret] = rcu_dereference_raw(*slot);
                 if (!results[ret])
                         continue;
+               if (radix_tree_is_indirect_ptr(results[ret])) {
+                       slot = radix_tree_iter_retry(&iter);
+                       continue;
+               }
                 if (++ret == max_items)
                         break;
         }
@@ -1098,9 +1102,13 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
                 return 0;
  
         radix_tree_for_each_tagged(slot, root, &iter, first_index, tag) {
-               results[ret] = indirect_to_ptr(rcu_dereference_raw(*slot));
+               results[ret] = rcu_dereference_raw(*slot);
                 if (!results[ret])
                         continue;
+               if (radix_tree_is_indirect_ptr(results[ret])) {
+                       slot = radix_tree_iter_retry(&iter);
+                       continue;
+               }
                 if (++ret == max_items)
                         break;
         }
diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c

index 98866a7..25b5cbf 100644 (file)
--- a/lib/test-string_helpers.c
+++ b/lib/test-string_helpers.c
@@ -327,36 +327,67 @@ out:
  }
  
  #define string_get_size_maxbuf 16
-#define test_string_get_size_one(size, blk_size, units, exp_result)            \
+#define test_string_get_size_one(size, blk_size, exp_result10, exp_result2)    \
         do {                                                                   \
-               BUILD_BUG_ON(sizeof(exp_result) >= string_get_size_maxbuf);    \
-               __test_string_get_size((size), (blk_size), (units),            \
-                                      (exp_result));                          \
+               BUILD_BUG_ON(sizeof(exp_result10) >= string_get_size_maxbuf);  \
+               BUILD_BUG_ON(sizeof(exp_result2) >= string_get_size_maxbuf);   \
+               __test_string_get_size((size), (blk_size), (exp_result10),     \
+                                      (exp_result2));                         \
         } while (0)
  
  
-static __init void __test_string_get_size(const u64 size, const u64 blk_size,
-                                         const enum string_size_units units,
-                                         const char *exp_result)
+static __init void test_string_get_size_check(const char *units,
+                                             const char *exp,
+                                             char *res,
+                                             const u64 size,
+                                             const u64 blk_size)
  {
-       char buf[string_get_size_maxbuf];
-
-       string_get_size(size, blk_size, units, buf, sizeof(buf));
-       if (!memcmp(buf, exp_result, strlen(exp_result) + 1))
+       if (!memcmp(res, exp, strlen(exp) + 1))
                 return;
  
-       buf[sizeof(buf) - 1] = '\0';
-       pr_warn("Test 'test_string_get_size_one' failed!\n");
-       pr_warn("string_get_size(size = %llu, blk_size = %llu, units = %d\n",
+       res[string_get_size_maxbuf - 1] = '\0';
+
+       pr_warn("Test 'test_string_get_size' failed!\n");
+       pr_warn("string_get_size(size = %llu, blk_size = %llu, units = %s)\n",
                 size, blk_size, units);
-       pr_warn("expected: '%s', got '%s'\n", exp_result, buf);
+       pr_warn("expected: '%s', got '%s'\n", exp, res);
+}
+
+static __init void __test_string_get_size(const u64 size, const u64 blk_size,
+                                         const char *exp_result10,
+                                         const char *exp_result2)
+{
+       char buf10[string_get_size_maxbuf];
+       char buf2[string_get_size_maxbuf];
+
+       string_get_size(size, blk_size, STRING_UNITS_10, buf10, sizeof(buf10));
+       string_get_size(size, blk_size, STRING_UNITS_2, buf2, sizeof(buf2));
+
+       test_string_get_size_check("STRING_UNITS_10", exp_result10, buf10,
+                                  size, blk_size);
+
+       test_string_get_size_check("STRING_UNITS_2", exp_result2, buf2,
+                                  size, blk_size);
  }
  
  static __init void test_string_get_size(void)
  {
-       test_string_get_size_one(16384, 512, STRING_UNITS_2, "8.00 MiB");
-       test_string_get_size_one(8192, 4096, STRING_UNITS_10, "32.7 MB");
-       test_string_get_size_one(1, 512, STRING_UNITS_10, "512 B");
+       /* small values */
+       test_string_get_size_one(0, 512, "0 B", "0 B");
+       test_string_get_size_one(1, 512, "512 B", "512 B");
+       test_string_get_size_one(1100, 1, "1.10 kB", "1.07 KiB");
+
+       /* normal values */
+       test_string_get_size_one(16384, 512, "8.39 MB", "8.00 MiB");
+       test_string_get_size_one(500118192, 512, "256 GB", "238 GiB");
+       test_string_get_size_one(8192, 4096, "33.6 MB", "32.0 MiB");
+
+       /* weird block sizes */
+       test_string_get_size_one(3000, 1900, "5.70 MB", "5.44 MiB");
+
+       /* huge values */
+       test_string_get_size_one(U64_MAX, 4096, "75.6 ZB", "64.0 ZiB");
+       test_string_get_size_one(4096, U64_MAX, "75.6 ZB", "64.0 ZiB");
  }
  
  static int __init test_string_helpers_init(void)
diff --git a/mm/gup.c b/mm/gup.c

index b64a361..7bf19ff 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -430,10 +430,8 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
                          * Anon pages in shared mappings are surprising: now
                          * just reject it.
                          */
-                       if (!is_cow_mapping(vm_flags)) {
-                               WARN_ON_ONCE(vm_flags & VM_MAYWRITE);
+                       if (!is_cow_mapping(vm_flags))
                                 return -EFAULT;
-                       }
                 }
         } else if (!(vm_flags & VM_READ)) {
                 if (!(gup_flags & FOLL_FORCE))
diff --git a/mm/huge_memory.c b/mm/huge_memory.c

index fd3a07b..36c0701 100644 (file)
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -138,9 +138,6 @@ static struct khugepaged_scan khugepaged_scan = {
         .mm_head = LIST_HEAD_INIT(khugepaged_scan.mm_head),
  };
  
-static DEFINE_SPINLOCK(split_queue_lock);
-static LIST_HEAD(split_queue);
-static unsigned long split_queue_len;
  static struct shrinker deferred_split_shrinker;
  
  static void set_recommended_min_free_kbytes(void)
@@ -861,7 +858,8 @@ static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
                 return false;
         entry = mk_pmd(zero_page, vma->vm_page_prot);
         entry = pmd_mkhuge(entry);
-       pgtable_trans_huge_deposit(mm, pmd, pgtable);
+       if (pgtable)
+               pgtable_trans_huge_deposit(mm, pmd, pgtable);
         set_pmd_at(mm, haddr, pmd, entry);
         atomic_long_inc(&mm->nr_ptes);
         return true;
@@ -1039,13 +1037,15 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
         spinlock_t *dst_ptl, *src_ptl;
         struct page *src_page;
         pmd_t pmd;
-       pgtable_t pgtable;
+       pgtable_t pgtable = NULL;
         int ret;
  
-       ret = -ENOMEM;
-       pgtable = pte_alloc_one(dst_mm, addr);
-       if (unlikely(!pgtable))
-               goto out;
+       if (!vma_is_dax(vma)) {
+               ret = -ENOMEM;
+               pgtable = pte_alloc_one(dst_mm, addr);
+               if (unlikely(!pgtable))
+                       goto out;
+       }
  
         dst_ptl = pmd_lock(dst_mm, dst_pmd);
         src_ptl = pmd_lockptr(src_mm, src_pmd);
@@ -1076,7 +1076,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                 goto out_unlock;
         }
  
-       if (pmd_trans_huge(pmd)) {
+       if (!vma_is_dax(vma)) {
                 /* thp accounting separate from pmd_devmap accounting */
                 src_page = pmd_page(pmd);
                 VM_BUG_ON_PAGE(!PageHead(src_page), src_page);
@@ -3358,6 +3358,7 @@ int total_mapcount(struct page *page)
  int split_huge_page_to_list(struct page *page, struct list_head *list)
  {
         struct page *head = compound_head(page);
+       struct pglist_data *pgdata = NODE_DATA(page_to_nid(head));
         struct anon_vma *anon_vma;
         int count, mapcount, ret;
         bool mlocked;
@@ -3401,19 +3402,19 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                 lru_add_drain();
  
         /* Prevent deferred_split_scan() touching ->_count */
-       spin_lock_irqsave(&split_queue_lock, flags);
+       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
         count = page_count(head);
         mapcount = total_mapcount(head);
         if (!mapcount && count == 1) {
                 if (!list_empty(page_deferred_list(head))) {
-                       split_queue_len--;
+                       pgdata->split_queue_len--;
                         list_del(page_deferred_list(head));
                 }
-               spin_unlock_irqrestore(&split_queue_lock, flags);
+               spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
                 __split_huge_page(page, list);
                 ret = 0;
         } else if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
-               spin_unlock_irqrestore(&split_queue_lock, flags);
+               spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
                 pr_alert("total_mapcount: %u, page_count(): %u\n",
                                 mapcount, count);
                 if (PageTail(page))
@@ -3421,7 +3422,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                 dump_page(page, "total_mapcount(head) > 0");
                 BUG();
         } else {
-               spin_unlock_irqrestore(&split_queue_lock, flags);
+               spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
                 unfreeze_page(anon_vma, head);
                 ret = -EBUSY;
         }
@@ -3436,64 +3437,65 @@ out:
  
  void free_transhuge_page(struct page *page)
  {
+       struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
         unsigned long flags;
  
-       spin_lock_irqsave(&split_queue_lock, flags);
+       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
         if (!list_empty(page_deferred_list(page))) {
-               split_queue_len--;
+               pgdata->split_queue_len--;
                 list_del(page_deferred_list(page));
         }
-       spin_unlock_irqrestore(&split_queue_lock, flags);
+       spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
         free_compound_page(page);
  }
  
  void deferred_split_huge_page(struct page *page)
  {
+       struct pglist_data *pgdata = NODE_DATA(page_to_nid(page));
         unsigned long flags;
  
         VM_BUG_ON_PAGE(!PageTransHuge(page), page);
  
-       spin_lock_irqsave(&split_queue_lock, flags);
+       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
         if (list_empty(page_deferred_list(page))) {
-               list_add_tail(page_deferred_list(page), &split_queue);
-               split_queue_len++;
+               list_add_tail(page_deferred_list(page), &pgdata->split_queue);
+               pgdata->split_queue_len++;
         }
-       spin_unlock_irqrestore(&split_queue_lock, flags);
+       spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
  }
  
  static unsigned long deferred_split_count(struct shrinker *shrink,
                 struct shrink_control *sc)
  {
-       /*
-        * Split a page from split_queue will free up at least one page,
-        * at most HPAGE_PMD_NR - 1. We don't track exact number.
-        * Let's use HPAGE_PMD_NR / 2 as ballpark.
-        */
-       return ACCESS_ONCE(split_queue_len) * HPAGE_PMD_NR / 2;
+       struct pglist_data *pgdata = NODE_DATA(sc->nid);
+       return ACCESS_ONCE(pgdata->split_queue_len);
  }
  
  static unsigned long deferred_split_scan(struct shrinker *shrink,
                 struct shrink_control *sc)
  {
+       struct pglist_data *pgdata = NODE_DATA(sc->nid);
         unsigned long flags;
         LIST_HEAD(list), *pos, *next;
         struct page *page;
         int split = 0;
  
-       spin_lock_irqsave(&split_queue_lock, flags);
-       list_splice_init(&split_queue, &list);
-
+       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
         /* Take pin on all head pages to avoid freeing them under us */
         list_for_each_safe(pos, next, &list) {
                 page = list_entry((void *)pos, struct page, mapping);
                 page = compound_head(page);
-               /* race with put_compound_page() */
-               if (!get_page_unless_zero(page)) {
+               if (get_page_unless_zero(page)) {
+                       list_move(page_deferred_list(page), &list);
+               } else {
+                       /* We lost race with put_compound_page() */
                         list_del_init(page_deferred_list(page));
-                       split_queue_len--;
+                       pgdata->split_queue_len--;
                 }
+               if (!--sc->nr_to_scan)
+                       break;
         }
-       spin_unlock_irqrestore(&split_queue_lock, flags);
+       spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
  
         list_for_each_safe(pos, next, &list) {
                 page = list_entry((void *)pos, struct page, mapping);
@@ -3505,17 +3507,24 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
                 put_page(page);
         }
  
-       spin_lock_irqsave(&split_queue_lock, flags);
-       list_splice_tail(&list, &split_queue);
-       spin_unlock_irqrestore(&split_queue_lock, flags);
+       spin_lock_irqsave(&pgdata->split_queue_lock, flags);
+       list_splice_tail(&list, &pgdata->split_queue);
+       spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
  
-       return split * HPAGE_PMD_NR / 2;
+       /*
+        * Stop shrinker if we didn't split any page, but the queue is empty.
+        * This can happen if pages were freed under us.
+        */
+       if (!split && list_empty(&pgdata->split_queue))
+               return SHRINK_STOP;
+       return split;
  }
  
  static struct shrinker deferred_split_shrinker = {
         .count_objects = deferred_split_count,
         .scan_objects = deferred_split_scan,
         .seeks = DEFAULT_SEEKS,
+       .flags = SHRINKER_NUMA_AWARE,
  };
  
  #ifdef CONFIG_DEBUG_FS
diff --git a/mm/internal.h b/mm/internal.h

index ed8b5ff..a38a21e 100644 (file)
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -216,6 +216,37 @@ static inline bool is_cow_mapping(vm_flags_t flags)
         return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
  }
  
+/*
+ * These three helpers classifies VMAs for virtual memory accounting.
+ */
+
+/*
+ * Executable code area - executable, not writable, not stack
+ */
+static inline bool is_exec_mapping(vm_flags_t flags)
+{
+       return (flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC;
+}
+
+/*
+ * Stack area - atomatically grows in one direction
+ *
+ * VM_GROWSUP / VM_GROWSDOWN VMAs are always private anonymous:
+ * do_mmap() forbids all other combinations.
+ */
+static inline bool is_stack_mapping(vm_flags_t flags)
+{
+       return (flags & VM_STACK) == VM_STACK;
+}
+
+/*
+ * Data area - private, writable, not stack
+ */
+static inline bool is_data_mapping(vm_flags_t flags)
+{
+       return (flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE;
+}
+
  /* mm/util.c */
  void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
                 struct vm_area_struct *prev, struct rb_node *rb_parent);
diff --git a/mm/memory.c b/mm/memory.c

index 93ce379..635451a 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2237,11 +2237,6 @@ static int wp_page_shared(struct mm_struct *mm, struct vm_area_struct *vma,
  
         page_cache_get(old_page);
  
-       /*
-        * Only catch write-faults on shared writable pages,
-        * read-only shared pages can get COWed by
-        * get_user_pages(.write=1, .force=1).
-        */
         if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
                 int tmp;
  
diff --git a/mm/mmap.c b/mm/mmap.c

index 84b1262..cfc0cdc 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -42,6 +42,7 @@
  #include <linux/memory.h>
  #include <linux/printk.h>
  #include <linux/userfaultfd_k.h>
+#include <linux/moduleparam.h>
  
  #include <asm/uaccess.h>
  #include <asm/cacheflush.h>
@@ -69,6 +70,8 @@ const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX;
  int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
  #endif
  
+static bool ignore_rlimit_data = true;
+core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
  
  static void unmap_region(struct mm_struct *mm,
                 struct vm_area_struct *vma, struct vm_area_struct *prev,
@@ -2982,9 +2985,17 @@ bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
         if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT)
                 return false;
  
-       if ((flags & (VM_WRITE | VM_SHARED | (VM_STACK_FLAGS &
-                               (VM_GROWSUP | VM_GROWSDOWN)))) == VM_WRITE)
-               return mm->data_vm + npages <= rlimit(RLIMIT_DATA);
+       if (is_data_mapping(flags) &&
+           mm->data_vm + npages > rlimit(RLIMIT_DATA) >> PAGE_SHIFT) {
+               if (ignore_rlimit_data)
+                       pr_warn_once("%s (%d): VmData %lu exceed data ulimit "
+                                    "%lu. Will be forbidden soon.\n",
+                                    current->comm, current->pid,
+                                    (mm->data_vm + npages) << PAGE_SHIFT,
+                                    rlimit(RLIMIT_DATA));
+               else
+                       return false;
+       }
  
         return true;
  }
@@ -2993,11 +3004,11 @@ void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages)
  {
         mm->total_vm += npages;
  
-       if ((flags & (VM_EXEC | VM_WRITE)) == VM_EXEC)
+       if (is_exec_mapping(flags))
                 mm->exec_vm += npages;
-       else if (flags & (VM_STACK_FLAGS & (VM_GROWSUP | VM_GROWSDOWN)))
+       else if (is_stack_mapping(flags))
                 mm->stack_vm += npages;
-       else if ((flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
+       else if (is_data_mapping(flags))
                 mm->data_vm += npages;
  }
  
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 63358d9..ea2c4d3 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5209,6 +5209,11 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
         spin_lock_init(&pgdat->numabalancing_migrate_lock);
         pgdat->numabalancing_migrate_nr_pages = 0;
         pgdat->numabalancing_migrate_next_window = jiffies;
+#endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       spin_lock_init(&pgdat->split_queue_lock);
+       INIT_LIST_HEAD(&pgdat->split_queue);
+       pgdat->split_queue_len = 0;
  #endif
         init_waitqueue_head(&pgdat->kswapd_wait);
         init_waitqueue_head(&pgdat->pfmemalloc_wait);
diff --git a/mm/util.c b/mm/util.c

index c108a65..4fb14ca 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -230,36 +230,11 @@ void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
  }
  
  /* Check if the vma is being used as a stack by this task */
-static int vm_is_stack_for_task(struct task_struct *t,
-                               struct vm_area_struct *vma)
+int vma_is_stack_for_task(struct vm_area_struct *vma, struct task_struct *t)
  {
         return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t));
  }
  
-/*
- * Check if the vma is being used as a stack.
- * If is_group is non-zero, check in the entire thread group or else
- * just check in the current task. Returns the task_struct of the task
- * that the vma is stack for. Must be called under rcu_read_lock().
- */
-struct task_struct *task_of_stack(struct task_struct *task,
-                               struct vm_area_struct *vma, bool in_group)
-{
-       if (vm_is_stack_for_task(task, vma))
-               return task;
-
-       if (in_group) {
-               struct task_struct *t;
-
-               for_each_thread(task, t) {
-                       if (vm_is_stack_for_task(t, vma))
-                               return t;
-               }
-       }
-
-       return NULL;
-}
-
  #if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
  void arch_pick_mmap_layout(struct mm_struct *mm)
  {
diff --git a/mm/vmpressure.c b/mm/vmpressure.c

index 9a6c070..149fdf6 100644 (file)
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -248,9 +248,8 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
  
         if (tree) {
                 spin_lock(&vmpr->sr_lock);
-               vmpr->tree_scanned += scanned;
+               scanned = vmpr->tree_scanned += scanned;
                 vmpr->tree_reclaimed += reclaimed;
-               scanned = vmpr->scanned;
                 spin_unlock(&vmpr->sr_lock);
  
                 if (scanned < vmpressure_win)
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c

index 81a2eb7..05d8158 100644 (file)
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -2068,6 +2068,15 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
                 err = -ENOMEM;
                 goto err_free_queues;
         }
+
+       /*
+        * Since this thread will not be kept in any rbtree not in a
+        * list, initialize its list node so that at thread__put() the
+        * current thread lifetime assuption is kept and we don't segfault
+        * at list_del_init().
+        */
+       INIT_LIST_HEAD(&pt->unknown_thread->node);
+
         err = thread__set_comm(pt->unknown_thread, "unknown", 0);
         if (err)
                 goto err_delete_thread;
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c

index 4f7b0ef..813d9b2 100644 (file)
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -399,6 +399,9 @@ static void tracepoint_error(struct parse_events_error *e, int err,
  {
         char help[BUFSIZ];
  
+       if (!e)
+               return;
+
         /*
          * We get error directly from syscall errno ( > 0),
          * or from encoded pointer's error ( < 0).
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c

index 2be10fb..4ce5c5e 100644 (file)
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -686,8 +686,9 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf)
                 pf->fb_ops = NULL;
  #if _ELFUTILS_PREREQ(0, 142)
         } else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa &&
-                  pf->cfi != NULL) {
-               if (dwarf_cfi_addrframe(pf->cfi, pf->addr, &frame) != 0 ||
+                  (pf->cfi_eh != NULL || pf->cfi_dbg != NULL)) {
+               if ((dwarf_cfi_addrframe(pf->cfi_eh, pf->addr, &frame) != 0 &&
+                    (dwarf_cfi_addrframe(pf->cfi_dbg, pf->addr, &frame) != 0)) ||
                     dwarf_frame_cfa(frame, &pf->fb_ops, &nops) != 0) {
                         pr_warning("Failed to get call frame on 0x%jx\n",
                                    (uintmax_t)pf->addr);
@@ -1015,8 +1016,7 @@ static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data)
         return DWARF_CB_OK;
  }
  
-/* Find probe points from debuginfo */
-static int debuginfo__find_probes(struct debuginfo *dbg,
+static int debuginfo__find_probe_location(struct debuginfo *dbg,
                                   struct probe_finder *pf)
  {
         struct perf_probe_point *pp = &pf->pev->point;
@@ -1025,27 +1025,6 @@ static int debuginfo__find_probes(struct debuginfo *dbg,
         Dwarf_Die *diep;
         int ret = 0;
  
-#if _ELFUTILS_PREREQ(0, 142)
-       Elf *elf;
-       GElf_Ehdr ehdr;
-       GElf_Shdr shdr;
-
-       /* Get the call frame information from this dwarf */
-       elf = dwarf_getelf(dbg->dbg);
-       if (elf == NULL)
-               return -EINVAL;
-
-       if (gelf_getehdr(elf, &ehdr) == NULL)
-               return -EINVAL;
-
-       if (elf_section_by_name(elf, &ehdr, &shdr, ".eh_frame", NULL) &&
-           shdr.sh_type == SHT_PROGBITS) {
-               pf->cfi = dwarf_getcfi_elf(elf);
-       } else {
-               pf->cfi = dwarf_getcfi(dbg->dbg);
-       }
-#endif
-
         off = 0;
         pf->lcache = intlist__new(NULL);
         if (!pf->lcache)
@@ -1108,6 +1087,39 @@ found:
         return ret;
  }
  
+/* Find probe points from debuginfo */
+static int debuginfo__find_probes(struct debuginfo *dbg,
+                                 struct probe_finder *pf)
+{
+       int ret = 0;
+
+#if _ELFUTILS_PREREQ(0, 142)
+       Elf *elf;
+       GElf_Ehdr ehdr;
+       GElf_Shdr shdr;
+
+       if (pf->cfi_eh || pf->cfi_dbg)
+               return debuginfo__find_probe_location(dbg, pf);
+
+       /* Get the call frame information from this dwarf */
+       elf = dwarf_getelf(dbg->dbg);
+       if (elf == NULL)
+               return -EINVAL;
+
+       if (gelf_getehdr(elf, &ehdr) == NULL)
+               return -EINVAL;
+
+       if (elf_section_by_name(elf, &ehdr, &shdr, ".eh_frame", NULL) &&
+           shdr.sh_type == SHT_PROGBITS)
+               pf->cfi_eh = dwarf_getcfi_elf(elf);
+
+       pf->cfi_dbg = dwarf_getcfi(dbg->dbg);
+#endif
+
+       ret = debuginfo__find_probe_location(dbg, pf);
+       return ret;
+}
+
  struct local_vars_finder {
         struct probe_finder *pf;
         struct perf_probe_arg *args;
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h

index bed8271..0aec770 100644 (file)
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -76,7 +76,10 @@ struct probe_finder {
  
         /* For variable searching */
  #if _ELFUTILS_PREREQ(0, 142)
-       Dwarf_CFI               *cfi;           /* Call Frame Information */
+       /* Call Frame Information from .eh_frame */
+       Dwarf_CFI               *cfi_eh;
+       /* Call Frame Information from .debug_frame */
+       Dwarf_CFI               *cfi_dbg;
  #endif
         Dwarf_Op                *fb_ops;        /* Frame base attribute */
         struct perf_probe_arg   *pvar;          /* Current target variable */
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c

index beeed0b..4d9b481 100644 (file)
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -311,6 +311,16 @@ int perf_stat_process_counter(struct perf_stat_config *config,
  
         aggr->val = aggr->ena = aggr->run = 0;
  
+       /*
+        * We calculate counter's data every interval,
+        * and the display code shows ps->res_stats
+        * avg value. We need to zero the stats for
+        * interval mode, otherwise overall avg running
+        * averages will be shown for each interval.
+        */
+       if (config->interval)
+               init_stats(ps->res_stats);
+
         if (counter->per_pkg)
                 zero_per_pkg(counter);
author	Ingo Molnar <mingo@kernel.org>
	Thu, 4 Feb 2016 07:57:44 +0000 (08:57 +0100)
committer	Ingo Molnar <mingo@kernel.org>
	Thu, 4 Feb 2016 07:57:44 +0000 (08:57 +0100)
Documentation/cgroup-v2.txt		patch \| blob \| history
Documentation/filesystems/proc.txt		patch \| blob \| history
Documentation/kernel-parameters.txt		patch \| blob \| history
MAINTAINERS		patch \| blob \| history
drivers/char/ipmi/ipmi_si_intf.c		patch \| blob \| history
drivers/hwspinlock/hwspinlock_core.c		patch \| blob \| history
drivers/scsi/sg.c		patch \| blob \| history
fs/nfs/flexfilelayout/flexfilelayout.c		patch \| blob \| history
fs/nfs/flexfilelayout/flexfilelayoutdev.c		patch \| blob \| history
fs/nfs/pnfs.c		patch \| blob \| history
fs/nfs/pnfs.h		patch \| blob \| history
fs/ocfs2/cluster/heartbeat.c		patch \| blob \| history
fs/proc/task_mmu.c		patch \| blob \| history
fs/proc/task_nommu.c		patch \| blob \| history
include/linux/memcontrol.h		patch \| blob \| history
include/linux/mm.h		patch \| blob \| history
include/linux/mm_types.h		patch \| blob \| history
include/linux/mmzone.h		patch \| blob \| history
include/linux/of.h		patch \| blob \| history
include/linux/radix-tree.h		patch \| blob \| history
kernel/trace/trace_stack.c		patch \| blob \| history
lib/radix-tree.c		patch \| blob \| history
lib/test-string_helpers.c		patch \| blob \| history
mm/gup.c		patch \| blob \| history
mm/huge_memory.c		patch \| blob \| history
mm/internal.h		patch \| blob \| history
mm/memory.c		patch \| blob \| history
mm/mmap.c		patch \| blob \| history
mm/page_alloc.c		patch \| blob \| history
mm/util.c		patch \| blob \| history
mm/vmpressure.c		patch \| blob \| history
tools/perf/util/intel-pt.c		patch \| blob \| history
tools/perf/util/parse-events.c		patch \| blob \| history
tools/perf/util/probe-finder.c		patch \| blob \| history
tools/perf/util/probe-finder.h		patch \| blob \| history
tools/perf/util/stat.c		patch \| blob \| history