xfs: reflink extents from one file to another

author Darrick J. Wong <darrick.wong@oracle.com>

Mon, 3 Oct 2016 16:11:40 +0000 (09:11 -0700)

committer Darrick J. Wong <darrick.wong@oracle.com>

Wed, 5 Oct 2016 23:26:05 +0000 (16:26 -0700)
author Darrick J. Wong <darrick.wong@oracle.com>
Mon, 3 Oct 2016 16:11:40 +0000 (09:11 -0700)
committer Darrick J. Wong <darrick.wong@oracle.com>
Wed, 5 Oct 2016 23:26:05 +0000 (16:26 -0700)
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c

index 0aac262..c1e98a4 100644 (file)
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -792,3 +792,431 @@ xfs_reflink_recover_cow(
  
         return error;
  }
+
+/*
+ * Reflinking (Block) Ranges of Two Files Together
+ *
+ * First, ensure that the reflink flag is set on both inodes.  The flag is an
+ * optimization to avoid unnecessary refcount btree lookups in the write path.
+ *
+ * Now we can iteratively remap the range of extents (and holes) in src to the
+ * corresponding ranges in dest.  Let drange and srange denote the ranges of
+ * logical blocks in dest and src touched by the reflink operation.
+ *
+ * While the length of drange is greater than zero,
+ *    - Read src's bmbt at the start of srange ("imap")
+ *    - If imap doesn't exist, make imap appear to start at the end of srange
+ *      with zero length.
+ *    - If imap starts before srange, advance imap to start at srange.
+ *    - If imap goes beyond srange, truncate imap to end at the end of srange.
+ *    - Punch (imap start - srange start + imap len) blocks from dest at
+ *      offset (drange start).
+ *    - If imap points to a real range of pblks,
+ *         > Increase the refcount of the imap's pblks
+ *         > Map imap's pblks into dest at the offset
+ *           (drange start + imap start - srange start)
+ *    - Advance drange and srange by (imap start - srange start + imap len)
+ *
+ * Finally, if the reflink made dest longer, update both the in-core and
+ * on-disk file sizes.
+ *
+ * ASCII Art Demonstration:
+ *
+ * Let's say we want to reflink this source file:
+ *
+ * ----SSSSSSS-SSSSS----SSSSSS (src file)
+ *   <-------------------->
+ *
+ * into this destination file:
+ *
+ * --DDDDDDDDDDDDDDDDDDD--DDD (dest file)
+ *        <-------------------->
+ * '-' means a hole, and 'S' and 'D' are written blocks in the src and dest.
+ * Observe that the range has different logical offsets in either file.
+ *
+ * Consider that the first extent in the source file doesn't line up with our
+ * reflink range.  Unmapping  and remapping are separate operations, so we can
+ * unmap more blocks from the destination file than we remap.
+ *
+ * ----SSSSSSS-SSSSS----SSSSSS
+ *   <------->
+ * --DDDDD---------DDDDD--DDD
+ *        <------->
+ *
+ * Now remap the source extent into the destination file:
+ *
+ * ----SSSSSSS-SSSSS----SSSSSS
+ *   <------->
+ * --DDDDD--SSSSSSSDDDDD--DDD
+ *        <------->
+ *
+ * Do likewise with the second hole and extent in our range.  Holes in the
+ * unmap range don't affect our operation.
+ *
+ * ----SSSSSSS-SSSSS----SSSSSS
+ *            <---->
+ * --DDDDD--SSSSSSS-SSSSS-DDD
+ *                 <---->
+ *
+ * Finally, unmap and remap part of the third extent.  This will increase the
+ * size of the destination file.
+ *
+ * ----SSSSSSS-SSSSS----SSSSSS
+ *                  <----->
+ * --DDDDD--SSSSSSS-SSSSS----SSS
+ *                       <----->
+ *
+ * Once we update the destination file's i_size, we're done.
+ */
+
+/*
+ * Ensure the reflink bit is set in both inodes.
+ */
+STATIC int
+xfs_reflink_set_inode_flag(
+       struct xfs_inode        *src,
+       struct xfs_inode        *dest)
+{
+       struct xfs_mount        *mp = src->i_mount;
+       int                     error;
+       struct xfs_trans        *tp;
+
+       if (xfs_is_reflink_inode(src) && xfs_is_reflink_inode(dest))
+               return 0;
+
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
+       if (error)
+               goto out_error;
+
+       /* Lock both files against IO */
+       if (src->i_ino == dest->i_ino)
+               xfs_ilock(src, XFS_ILOCK_EXCL);
+       else
+               xfs_lock_two_inodes(src, dest, XFS_ILOCK_EXCL);
+
+       if (!xfs_is_reflink_inode(src)) {
+               trace_xfs_reflink_set_inode_flag(src);
+               xfs_trans_ijoin(tp, src, XFS_ILOCK_EXCL);
+               src->i_d.di_flags2 |= XFS_DIFLAG2_REFLINK;
+               xfs_trans_log_inode(tp, src, XFS_ILOG_CORE);
+               xfs_ifork_init_cow(src);
+       } else
+               xfs_iunlock(src, XFS_ILOCK_EXCL);
+
+       if (src->i_ino == dest->i_ino)
+               goto commit_flags;
+
+       if (!xfs_is_reflink_inode(dest)) {
+               trace_xfs_reflink_set_inode_flag(dest);
+               xfs_trans_ijoin(tp, dest, XFS_ILOCK_EXCL);
+               dest->i_d.di_flags2 |= XFS_DIFLAG2_REFLINK;
+               xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE);
+               xfs_ifork_init_cow(dest);
+       } else
+               xfs_iunlock(dest, XFS_ILOCK_EXCL);
+
+commit_flags:
+       error = xfs_trans_commit(tp);
+       if (error)
+               goto out_error;
+       return error;
+
+out_error:
+       trace_xfs_reflink_set_inode_flag_error(dest, error, _RET_IP_);
+       return error;
+}
+
+/*
+ * Update destination inode size, if necessary.
+ */
+STATIC int
+xfs_reflink_update_dest(
+       struct xfs_inode        *dest,
+       xfs_off_t               newlen)
+{
+       struct xfs_mount        *mp = dest->i_mount;
+       struct xfs_trans        *tp;
+       int                     error;
+
+       if (newlen <= i_size_read(VFS_I(dest)))
+               return 0;
+
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
+       if (error)
+               goto out_error;
+
+       xfs_ilock(dest, XFS_ILOCK_EXCL);
+       xfs_trans_ijoin(tp, dest, XFS_ILOCK_EXCL);
+
+       trace_xfs_reflink_update_inode_size(dest, newlen);
+       i_size_write(VFS_I(dest), newlen);
+       dest->i_d.di_size = newlen;
+       xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE);
+
+       error = xfs_trans_commit(tp);
+       if (error)
+               goto out_error;
+       return error;
+
+out_error:
+       trace_xfs_reflink_update_inode_size_error(dest, error, _RET_IP_);
+       return error;
+}
+
+/*
+ * Unmap a range of blocks from a file, then map other blocks into the hole.
+ * The range to unmap is (destoff : destoff + srcioff + irec->br_blockcount).
+ * The extent irec is mapped into dest at irec->br_startoff.
+ */
+STATIC int
+xfs_reflink_remap_extent(
+       struct xfs_inode        *ip,
+       struct xfs_bmbt_irec    *irec,
+       xfs_fileoff_t           destoff,
+       xfs_off_t               new_isize)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_trans        *tp;
+       xfs_fsblock_t           firstfsb;
+       unsigned int            resblks;
+       struct xfs_defer_ops    dfops;
+       struct xfs_bmbt_irec    uirec;
+       bool                    real_extent;
+       xfs_filblks_t           rlen;
+       xfs_filblks_t           unmap_len;
+       xfs_off_t               newlen;
+       int                     error;
+
+       unmap_len = irec->br_startoff + irec->br_blockcount - destoff;
+       trace_xfs_reflink_punch_range(ip, destoff, unmap_len);
+
+       /* Only remap normal extents. */
+       real_extent =  (irec->br_startblock != HOLESTARTBLOCK &&
+                       irec->br_startblock != DELAYSTARTBLOCK &&
+                       !ISUNWRITTEN(irec));
+
+       /* Start a rolling transaction to switch the mappings */
+       resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
+       if (error)
+               goto out;
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       xfs_trans_ijoin(tp, ip, 0);
+
+       /* If we're not just clearing space, then do we have enough quota? */
+       if (real_extent) {
+               error = xfs_trans_reserve_quota_nblks(tp, ip,
+                               irec->br_blockcount, 0, XFS_QMOPT_RES_REGBLKS);
+               if (error)
+                       goto out_cancel;
+       }
+
+       trace_xfs_reflink_remap(ip, irec->br_startoff,
+                               irec->br_blockcount, irec->br_startblock);
+
+       /* Unmap the old blocks in the data fork. */
+       rlen = unmap_len;
+       while (rlen) {
+               xfs_defer_init(&dfops, &firstfsb);
+               error = __xfs_bunmapi(tp, ip, destoff, &rlen, 0, 1,
+                               &firstfsb, &dfops);
+               if (error)
+                       goto out_defer;
+
+               /*
+                * Trim the extent to whatever got unmapped.
+                * Remember, bunmapi works backwards.
+                */
+               uirec.br_startblock = irec->br_startblock + rlen;
+               uirec.br_startoff = irec->br_startoff + rlen;
+               uirec.br_blockcount = unmap_len - rlen;
+               unmap_len = rlen;
+
+               /* If this isn't a real mapping, we're done. */
+               if (!real_extent || uirec.br_blockcount == 0)
+                       goto next_extent;
+
+               trace_xfs_reflink_remap(ip, uirec.br_startoff,
+                               uirec.br_blockcount, uirec.br_startblock);
+
+               /* Update the refcount tree */
+               error = xfs_refcount_increase_extent(mp, &dfops, &uirec);
+               if (error)
+                       goto out_defer;
+
+               /* Map the new blocks into the data fork. */
+               error = xfs_bmap_map_extent(mp, &dfops, ip, &uirec);
+               if (error)
+                       goto out_defer;
+
+               /* Update quota accounting. */
+               xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT,
+                               uirec.br_blockcount);
+
+               /* Update dest isize if needed. */
+               newlen = XFS_FSB_TO_B(mp,
+                               uirec.br_startoff + uirec.br_blockcount);
+               newlen = min_t(xfs_off_t, newlen, new_isize);
+               if (newlen > i_size_read(VFS_I(ip))) {
+                       trace_xfs_reflink_update_inode_size(ip, newlen);
+                       i_size_write(VFS_I(ip), newlen);
+                       ip->i_d.di_size = newlen;
+                       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+               }
+
+next_extent:
+               /* Process all the deferred stuff. */
+               error = xfs_defer_finish(&tp, &dfops, ip);
+               if (error)
+                       goto out_defer;
+       }
+
+       error = xfs_trans_commit(tp);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       if (error)
+               goto out;
+       return 0;
+
+out_defer:
+       xfs_defer_cancel(&dfops);
+out_cancel:
+       xfs_trans_cancel(tp);
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+out:
+       trace_xfs_reflink_remap_extent_error(ip, error, _RET_IP_);
+       return error;
+}
+
+/*
+ * Iteratively remap one file's extents (and holes) to another's.
+ */
+STATIC int
+xfs_reflink_remap_blocks(
+       struct xfs_inode        *src,
+       xfs_fileoff_t           srcoff,
+       struct xfs_inode        *dest,
+       xfs_fileoff_t           destoff,
+       xfs_filblks_t           len,
+       xfs_off_t               new_isize)
+{
+       struct xfs_bmbt_irec    imap;
+       int                     nimaps;
+       int                     error = 0;
+       xfs_filblks_t           range_len;
+
+       /* drange = (destoff, destoff + len); srange = (srcoff, srcoff + len) */
+       while (len) {
+               trace_xfs_reflink_remap_blocks_loop(src, srcoff, len,
+                               dest, destoff);
+               /* Read extent from the source file */
+               nimaps = 1;
+               xfs_ilock(src, XFS_ILOCK_EXCL);
+               error = xfs_bmapi_read(src, srcoff, len, &imap, &nimaps, 0);
+               xfs_iunlock(src, XFS_ILOCK_EXCL);
+               if (error)
+                       goto err;
+               ASSERT(nimaps == 1);
+
+               trace_xfs_reflink_remap_imap(src, srcoff, len, XFS_IO_OVERWRITE,
+                               &imap);
+
+               /* Translate imap into the destination file. */
+               range_len = imap.br_startoff + imap.br_blockcount - srcoff;
+               imap.br_startoff += destoff - srcoff;
+
+               /* Clear dest from destoff to the end of imap and map it in. */
+               error = xfs_reflink_remap_extent(dest, &imap, destoff,
+                               new_isize);
+               if (error)
+                       goto err;
+
+               if (fatal_signal_pending(current)) {
+                       error = -EINTR;
+                       goto err;
+               }
+
+               /* Advance drange/srange */
+               srcoff += range_len;
+               destoff += range_len;
+               len -= range_len;
+       }
+
+       return 0;
+
+err:
+       trace_xfs_reflink_remap_blocks_error(dest, error, _RET_IP_);
+       return error;
+}
+
+/*
+ * Link a range of blocks from one file to another.
+ */
+int
+xfs_reflink_remap_range(
+       struct xfs_inode        *src,
+       xfs_off_t               srcoff,
+       struct xfs_inode        *dest,
+       xfs_off_t               destoff,
+       xfs_off_t               len)
+{
+       struct xfs_mount        *mp = src->i_mount;
+       xfs_fileoff_t           sfsbno, dfsbno;
+       xfs_filblks_t           fsblen;
+       int                     error;
+
+       if (!xfs_sb_version_hasreflink(&mp->m_sb))
+               return -EOPNOTSUPP;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -EIO;
+
+       /* Don't reflink realtime inodes */
+       if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
+               return -EINVAL;
+
+       trace_xfs_reflink_remap_range(src, srcoff, len, dest, destoff);
+
+       /* Lock both files against IO */
+       if (src->i_ino == dest->i_ino) {
+               xfs_ilock(src, XFS_IOLOCK_EXCL);
+               xfs_ilock(src, XFS_MMAPLOCK_EXCL);
+       } else {
+               xfs_lock_two_inodes(src, dest, XFS_IOLOCK_EXCL);
+               xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL);
+       }
+
+       error = xfs_reflink_set_inode_flag(src, dest);
+       if (error)
+               goto out_error;
+
+       /*
+        * Invalidate the page cache so that we can clear any CoW mappings
+        * in the destination file.
+        */
+       truncate_inode_pages_range(&VFS_I(dest)->i_data, destoff,
+                                  PAGE_ALIGN(destoff + len) - 1);
+
+       dfsbno = XFS_B_TO_FSBT(mp, destoff);
+       sfsbno = XFS_B_TO_FSBT(mp, srcoff);
+       fsblen = XFS_B_TO_FSB(mp, len);
+       error = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen,
+                       destoff + len);
+       if (error)
+               goto out_error;
+
+       error = xfs_reflink_update_dest(dest, destoff + len);
+       if (error)
+               goto out_error;
+
+out_error:
+       xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
+       xfs_iunlock(src, XFS_IOLOCK_EXCL);
+       if (src->i_ino != dest->i_ino) {
+               xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
+               xfs_iunlock(dest, XFS_IOLOCK_EXCL);
+       }
+       if (error)
+               trace_xfs_reflink_remap_range_error(dest, error, _RET_IP_);
+       return error;
+}
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h

index 1d2f180..c35ce29 100644 (file)
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -43,5 +43,7 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
  extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
                 xfs_off_t count);
  extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
+extern int xfs_reflink_remap_range(struct xfs_inode *src, xfs_off_t srcoff,
+               struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len);
  
  #endif /* __XFS_REFLINK_H */
author	Darrick J. Wong <darrick.wong@oracle.com>
	Mon, 3 Oct 2016 16:11:40 +0000 (09:11 -0700)
committer	Darrick J. Wong <darrick.wong@oracle.com>
	Wed, 5 Oct 2016 23:26:05 +0000 (16:26 -0700)
fs/xfs/xfs_reflink.c		patch \| blob \| history
fs/xfs/xfs_reflink.h		patch \| blob \| history