Merge tag 'driver-core-4.9-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git...
[cascardo/linux.git] / fs / xfs / xfs_icache.c
index 65b2e3f..f295049 100644 (file)
@@ -33,6 +33,7 @@
 #include "xfs_bmap_util.h"
 #include "xfs_dquot_item.h"
 #include "xfs_dquot.h"
+#include "xfs_reflink.h"
 
 #include <linux/kthread.h>
 #include <linux/freezer.h>
@@ -76,6 +77,9 @@ xfs_inode_alloc(
        ip->i_mount = mp;
        memset(&ip->i_imap, 0, sizeof(struct xfs_imap));
        ip->i_afp = NULL;
+       ip->i_cowfp = NULL;
+       ip->i_cnextents = 0;
+       ip->i_cformat = XFS_DINODE_FMT_EXTENTS;
        memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
        ip->i_flags = 0;
        ip->i_delayed_blks = 0;
@@ -101,6 +105,8 @@ xfs_inode_free_callback(
 
        if (ip->i_afp)
                xfs_idestroy_fork(ip, XFS_ATTR_FORK);
+       if (ip->i_cowfp)
+               xfs_idestroy_fork(ip, XFS_COW_FORK);
 
        if (ip->i_itemp) {
                ASSERT(!(ip->i_itemp->ili_item.li_flags & XFS_LI_IN_AIL));
@@ -787,6 +793,33 @@ xfs_eofblocks_worker(
        xfs_queue_eofblocks(mp);
 }
 
+/*
+ * Background scanning to trim preallocated CoW space. This is queued
+ * based on the 'speculative_cow_prealloc_lifetime' tunable (5m by default).
+ * (We'll just piggyback on the post-EOF prealloc space workqueue.)
+ */
+STATIC void
+xfs_queue_cowblocks(
+       struct xfs_mount *mp)
+{
+       rcu_read_lock();
+       if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_COWBLOCKS_TAG))
+               queue_delayed_work(mp->m_eofblocks_workqueue,
+                                  &mp->m_cowblocks_work,
+                                  msecs_to_jiffies(xfs_cowb_secs * 1000));
+       rcu_read_unlock();
+}
+
+void
+xfs_cowblocks_worker(
+       struct work_struct *work)
+{
+       struct xfs_mount *mp = container_of(to_delayed_work(work),
+                               struct xfs_mount, m_cowblocks_work);
+       xfs_icache_free_cowblocks(mp, NULL);
+       xfs_queue_cowblocks(mp);
+}
+
 int
 xfs_inode_ag_iterator(
        struct xfs_mount        *mp,
@@ -1343,18 +1376,30 @@ xfs_inode_free_eofblocks(
        return ret;
 }
 
-int
-xfs_icache_free_eofblocks(
+static int
+__xfs_icache_free_eofblocks(
        struct xfs_mount        *mp,
-       struct xfs_eofblocks    *eofb)
+       struct xfs_eofblocks    *eofb,
+       int                     (*execute)(struct xfs_inode *ip, int flags,
+                                          void *args),
+       int                     tag)
 {
        int flags = SYNC_TRYLOCK;
 
        if (eofb && (eofb->eof_flags & XFS_EOF_FLAGS_SYNC))
                flags = SYNC_WAIT;
 
-       return xfs_inode_ag_iterator_tag(mp, xfs_inode_free_eofblocks, flags,
-                                        eofb, XFS_ICI_EOFBLOCKS_TAG);
+       return xfs_inode_ag_iterator_tag(mp, execute, flags,
+                                        eofb, tag);
+}
+
+int
+xfs_icache_free_eofblocks(
+       struct xfs_mount        *mp,
+       struct xfs_eofblocks    *eofb)
+{
+       return __xfs_icache_free_eofblocks(mp, eofb, xfs_inode_free_eofblocks,
+                       XFS_ICI_EOFBLOCKS_TAG);
 }
 
 /*
@@ -1363,9 +1408,11 @@ xfs_icache_free_eofblocks(
  * failure. We make a best effort by including each quota under low free space
  * conditions (less than 1% free space) in the scan.
  */
-int
-xfs_inode_free_quota_eofblocks(
-       struct xfs_inode *ip)
+static int
+__xfs_inode_free_quota_eofblocks(
+       struct xfs_inode        *ip,
+       int                     (*execute)(struct xfs_mount *mp,
+                                          struct xfs_eofblocks *eofb))
 {
        int scan = 0;
        struct xfs_eofblocks eofb = {0};
@@ -1401,14 +1448,25 @@ xfs_inode_free_quota_eofblocks(
        }
 
        if (scan)
-               xfs_icache_free_eofblocks(ip->i_mount, &eofb);
+               execute(ip->i_mount, &eofb);
 
        return scan;
 }
 
-void
-xfs_inode_set_eofblocks_tag(
-       xfs_inode_t     *ip)
+int
+xfs_inode_free_quota_eofblocks(
+       struct xfs_inode *ip)
+{
+       return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_eofblocks);
+}
+
+static void
+__xfs_inode_set_eofblocks_tag(
+       xfs_inode_t     *ip,
+       void            (*execute)(struct xfs_mount *mp),
+       void            (*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno,
+                                 int error, unsigned long caller_ip),
+       int             tag)
 {
        struct xfs_mount *mp = ip->i_mount;
        struct xfs_perag *pag;
@@ -1426,26 +1484,22 @@ xfs_inode_set_eofblocks_tag(
 
        pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
        spin_lock(&pag->pag_ici_lock);
-       trace_xfs_inode_set_eofblocks_tag(ip);
 
-       tagged = radix_tree_tagged(&pag->pag_ici_root,
-                                  XFS_ICI_EOFBLOCKS_TAG);
+       tagged = radix_tree_tagged(&pag->pag_ici_root, tag);
        radix_tree_tag_set(&pag->pag_ici_root,
-                          XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
-                          XFS_ICI_EOFBLOCKS_TAG);
+                          XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), tag);
        if (!tagged) {
                /* propagate the eofblocks tag up into the perag radix tree */
                spin_lock(&ip->i_mount->m_perag_lock);
                radix_tree_tag_set(&ip->i_mount->m_perag_tree,
                                   XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
-                                  XFS_ICI_EOFBLOCKS_TAG);
+                                  tag);
                spin_unlock(&ip->i_mount->m_perag_lock);
 
                /* kick off background trimming */
-               xfs_queue_eofblocks(ip->i_mount);
+               execute(ip->i_mount);
 
-               trace_xfs_perag_set_eofblocks(ip->i_mount, pag->pag_agno,
-                                             -1, _RET_IP_);
+               set_tp(ip->i_mount, pag->pag_agno, -1, _RET_IP_);
        }
 
        spin_unlock(&pag->pag_ici_lock);
@@ -1453,8 +1507,21 @@ xfs_inode_set_eofblocks_tag(
 }
 
 void
-xfs_inode_clear_eofblocks_tag(
+xfs_inode_set_eofblocks_tag(
        xfs_inode_t     *ip)
+{
+       trace_xfs_inode_set_eofblocks_tag(ip);
+       return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_eofblocks,
+                       trace_xfs_perag_set_eofblocks,
+                       XFS_ICI_EOFBLOCKS_TAG);
+}
+
+static void
+__xfs_inode_clear_eofblocks_tag(
+       xfs_inode_t     *ip,
+       void            (*clear_tp)(struct xfs_mount *mp, xfs_agnumber_t agno,
+                                   int error, unsigned long caller_ip),
+       int             tag)
 {
        struct xfs_mount *mp = ip->i_mount;
        struct xfs_perag *pag;
@@ -1465,23 +1532,141 @@ xfs_inode_clear_eofblocks_tag(
 
        pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
        spin_lock(&pag->pag_ici_lock);
-       trace_xfs_inode_clear_eofblocks_tag(ip);
 
        radix_tree_tag_clear(&pag->pag_ici_root,
-                            XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
-                            XFS_ICI_EOFBLOCKS_TAG);
-       if (!radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_EOFBLOCKS_TAG)) {
+                            XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), tag);
+       if (!radix_tree_tagged(&pag->pag_ici_root, tag)) {
                /* clear the eofblocks tag from the perag radix tree */
                spin_lock(&ip->i_mount->m_perag_lock);
                radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
                                     XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
-                                    XFS_ICI_EOFBLOCKS_TAG);
+                                    tag);
                spin_unlock(&ip->i_mount->m_perag_lock);
-               trace_xfs_perag_clear_eofblocks(ip->i_mount, pag->pag_agno,
-                                              -1, _RET_IP_);
+               clear_tp(ip->i_mount, pag->pag_agno, -1, _RET_IP_);
        }
 
        spin_unlock(&pag->pag_ici_lock);
        xfs_perag_put(pag);
 }
 
+void
+xfs_inode_clear_eofblocks_tag(
+       xfs_inode_t     *ip)
+{
+       trace_xfs_inode_clear_eofblocks_tag(ip);
+       return __xfs_inode_clear_eofblocks_tag(ip,
+                       trace_xfs_perag_clear_eofblocks, XFS_ICI_EOFBLOCKS_TAG);
+}
+
+/*
+ * Automatic CoW Reservation Freeing
+ *
+ * These functions automatically garbage collect leftover CoW reservations
+ * that were made on behalf of a cowextsize hint when we start to run out
+ * of quota or when the reservations sit around for too long.  If the file
+ * has dirty pages or is undergoing writeback, its CoW reservations will
+ * be retained.
+ *
+ * The actual garbage collection piggybacks off the same code that runs
+ * the speculative EOF preallocation garbage collector.
+ */
+STATIC int
+xfs_inode_free_cowblocks(
+       struct xfs_inode        *ip,
+       int                     flags,
+       void                    *args)
+{
+       int ret;
+       struct xfs_eofblocks *eofb = args;
+       bool need_iolock = true;
+       int match;
+
+       ASSERT(!eofb || (eofb && eofb->eof_scan_owner != 0));
+
+       if (!xfs_reflink_has_real_cow_blocks(ip)) {
+               trace_xfs_inode_free_cowblocks_invalid(ip);
+               xfs_inode_clear_cowblocks_tag(ip);
+               return 0;
+       }
+
+       /*
+        * If the mapping is dirty or under writeback we cannot touch the
+        * CoW fork.  Leave it alone if we're in the midst of a directio.
+        */
+       if (mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) ||
+           mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) ||
+           atomic_read(&VFS_I(ip)->i_dio_count))
+               return 0;
+
+       if (eofb) {
+               if (eofb->eof_flags & XFS_EOF_FLAGS_UNION)
+                       match = xfs_inode_match_id_union(ip, eofb);
+               else
+                       match = xfs_inode_match_id(ip, eofb);
+               if (!match)
+                       return 0;
+
+               /* skip the inode if the file size is too small */
+               if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE &&
+                   XFS_ISIZE(ip) < eofb->eof_min_file_size)
+                       return 0;
+
+               /*
+                * A scan owner implies we already hold the iolock. Skip it in
+                * xfs_free_eofblocks() to avoid deadlock. This also eliminates
+                * the possibility of EAGAIN being returned.
+                */
+               if (eofb->eof_scan_owner == ip->i_ino)
+                       need_iolock = false;
+       }
+
+       /* Free the CoW blocks */
+       if (need_iolock) {
+               xfs_ilock(ip, XFS_IOLOCK_EXCL);
+               xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+       }
+
+       ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF);
+
+       if (need_iolock) {
+               xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
+               xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+       }
+
+       return ret;
+}
+
+int
+xfs_icache_free_cowblocks(
+       struct xfs_mount        *mp,
+       struct xfs_eofblocks    *eofb)
+{
+       return __xfs_icache_free_eofblocks(mp, eofb, xfs_inode_free_cowblocks,
+                       XFS_ICI_COWBLOCKS_TAG);
+}
+
+int
+xfs_inode_free_quota_cowblocks(
+       struct xfs_inode *ip)
+{
+       return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_cowblocks);
+}
+
+void
+xfs_inode_set_cowblocks_tag(
+       xfs_inode_t     *ip)
+{
+       trace_xfs_inode_set_cowblocks_tag(ip);
+       return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks,
+                       trace_xfs_perag_set_cowblocks,
+                       XFS_ICI_COWBLOCKS_TAG);
+}
+
+void
+xfs_inode_clear_cowblocks_tag(
+       xfs_inode_t     *ip)
+{
+       trace_xfs_inode_clear_cowblocks_tag(ip);
+       return __xfs_inode_clear_eofblocks_tag(ip,
+                       trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG);
+}