xfs: add xfs_trim_extent
[cascardo/linux.git] / fs / xfs / libxfs / xfs_bmap.c
index 9d7f61d..381e765 100644 (file)
@@ -48,6 +48,7 @@
 #include "xfs_filestream.h"
 #include "xfs_rmap.h"
 #include "xfs_ag_resv.h"
+#include "xfs_refcount.h"
 
 
 kmem_zone_t            *xfs_bmap_free_item_zone;
@@ -140,7 +141,8 @@ xfs_bmbt_lookup_ge(
  */
 static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
 {
-       return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
+       return whichfork != XFS_COW_FORK &&
+               XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
                XFS_IFORK_NEXTENTS(ip, whichfork) >
                        XFS_IFORK_MAXEXT(ip, whichfork);
 }
@@ -150,7 +152,8 @@ static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
  */
 static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
 {
-       return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
+       return whichfork != XFS_COW_FORK &&
+               XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
                XFS_IFORK_NEXTENTS(ip, whichfork) <=
                        XFS_IFORK_MAXEXT(ip, whichfork);
 }
@@ -640,6 +643,7 @@ xfs_bmap_btree_to_extents(
 
        mp = ip->i_mount;
        ifp = XFS_IFORK_PTR(ip, whichfork);
+       ASSERT(whichfork != XFS_COW_FORK);
        ASSERT(ifp->if_flags & XFS_IFEXTENTS);
        ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
        rblock = ifp->if_broot;
@@ -706,6 +710,7 @@ xfs_bmap_extents_to_btree(
        xfs_bmbt_ptr_t          *pp;            /* root block address pointer */
 
        mp = ip->i_mount;
+       ASSERT(whichfork != XFS_COW_FORK);
        ifp = XFS_IFORK_PTR(ip, whichfork);
        ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
 
@@ -748,6 +753,7 @@ xfs_bmap_extents_to_btree(
                args.type = XFS_ALLOCTYPE_START_BNO;
                args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
        } else if (dfops->dop_low) {
+try_another_ag:
                args.type = XFS_ALLOCTYPE_START_BNO;
                args.fsbno = *firstblock;
        } else {
@@ -762,6 +768,21 @@ xfs_bmap_extents_to_btree(
                xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
                return error;
        }
+
+       /*
+        * During a CoW operation, the allocation and bmbt updates occur in
+        * different transactions.  The mapping code tries to put new bmbt
+        * blocks near extents being mapped, but the only way to guarantee this
+        * is if the alloc and the mapping happen in a single transaction that
+        * has a block reservation.  That isn't the case here, so if we run out
+        * of space we'll try again with another AG.
+        */
+       if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) &&
+           args.fsbno == NULLFSBLOCK &&
+           args.type == XFS_ALLOCTYPE_NEAR_BNO) {
+               dfops->dop_low = true;
+               goto try_another_ag;
+       }
        /*
         * Allocation can't fail, the space was reserved.
         */
@@ -837,6 +858,7 @@ xfs_bmap_local_to_extents_empty(
 {
        struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
 
+       ASSERT(whichfork != XFS_COW_FORK);
        ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
        ASSERT(ifp->if_bytes == 0);
        ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0);
@@ -896,6 +918,7 @@ xfs_bmap_local_to_extents(
         * file currently fits in an inode.
         */
        if (*firstblock == NULLFSBLOCK) {
+try_another_ag:
                args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
                args.type = XFS_ALLOCTYPE_START_BNO;
        } else {
@@ -908,6 +931,19 @@ xfs_bmap_local_to_extents(
        if (error)
                goto done;
 
+       /*
+        * During a CoW operation, the allocation and bmbt updates occur in
+        * different transactions.  The mapping code tries to put new bmbt
+        * blocks near extents being mapped, but the only way to guarantee this
+        * is if the alloc and the mapping happen in a single transaction that
+        * has a block reservation.  That isn't the case here, so if we run out
+        * of space we'll try again with another AG.
+        */
+       if (xfs_sb_version_hasreflink(&ip->i_mount->m_sb) &&
+           args.fsbno == NULLFSBLOCK &&
+           args.type == XFS_ALLOCTYPE_NEAR_BNO) {
+               goto try_another_ag;
+       }
        /* Can't fail, the space was reserved. */
        ASSERT(args.fsbno != NULLFSBLOCK);
        ASSERT(args.len == 1);
@@ -1670,7 +1706,8 @@ xfs_bmap_one_block(
  */
 STATIC int                             /* error */
 xfs_bmap_add_extent_delay_real(
-       struct xfs_bmalloca     *bma)
+       struct xfs_bmalloca     *bma,
+       int                     whichfork)
 {
        struct xfs_bmbt_irec    *new = &bma->got;
        int                     diff;   /* temp value */
@@ -1688,11 +1725,14 @@ xfs_bmap_add_extent_delay_real(
        xfs_filblks_t           temp=0; /* value for da_new calculations */
        xfs_filblks_t           temp2=0;/* value for da_new calculations */
        int                     tmp_rval;       /* partial logging flags */
-       int                     whichfork = XFS_DATA_FORK;
        struct xfs_mount        *mp;
+       xfs_extnum_t            *nextents;
 
        mp = bma->ip->i_mount;
        ifp = XFS_IFORK_PTR(bma->ip, whichfork);
+       ASSERT(whichfork != XFS_ATTR_FORK);
+       nextents = (whichfork == XFS_COW_FORK ? &bma->ip->i_cnextents :
+                                               &bma->ip->i_d.di_nextents);
 
        ASSERT(bma->idx >= 0);
        ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
@@ -1706,6 +1746,9 @@ xfs_bmap_add_extent_delay_real(
 #define        RIGHT           r[1]
 #define        PREV            r[2]
 
+       if (whichfork == XFS_COW_FORK)
+               state |= BMAP_COWFORK;
+
        /*
         * Set up a bunch of variables to make the tests simpler.
         */
@@ -1792,7 +1835,7 @@ xfs_bmap_add_extent_delay_real(
                trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
 
                xfs_iext_remove(bma->ip, bma->idx + 1, 2, state);
-               bma->ip->i_d.di_nextents--;
+               (*nextents)--;
                if (bma->cur == NULL)
                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
                else {
@@ -1894,7 +1937,7 @@ xfs_bmap_add_extent_delay_real(
                xfs_bmbt_set_startblock(ep, new->br_startblock);
                trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
 
-               bma->ip->i_d.di_nextents++;
+               (*nextents)++;
                if (bma->cur == NULL)
                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
                else {
@@ -1964,7 +2007,7 @@ xfs_bmap_add_extent_delay_real(
                temp = PREV.br_blockcount - new->br_blockcount;
                xfs_bmbt_set_blockcount(ep, temp);
                xfs_iext_insert(bma->ip, bma->idx, 1, new, state);
-               bma->ip->i_d.di_nextents++;
+               (*nextents)++;
                if (bma->cur == NULL)
                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
                else {
@@ -2048,7 +2091,7 @@ xfs_bmap_add_extent_delay_real(
                trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
                xfs_bmbt_set_blockcount(ep, temp);
                xfs_iext_insert(bma->ip, bma->idx + 1, 1, new, state);
-               bma->ip->i_d.di_nextents++;
+               (*nextents)++;
                if (bma->cur == NULL)
                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
                else {
@@ -2117,7 +2160,7 @@ xfs_bmap_add_extent_delay_real(
                RIGHT.br_blockcount = temp2;
                /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */
                xfs_iext_insert(bma->ip, bma->idx + 1, 2, &LEFT, state);
-               bma->ip->i_d.di_nextents++;
+               (*nextents)++;
                if (bma->cur == NULL)
                        rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
                else {
@@ -2215,7 +2258,8 @@ xfs_bmap_add_extent_delay_real(
 
        xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
 done:
-       bma->logflags |= rval;
+       if (whichfork != XFS_COW_FORK)
+               bma->logflags |= rval;
        return error;
 #undef LEFT
 #undef RIGHT
@@ -2759,6 +2803,7 @@ done:
 STATIC void
 xfs_bmap_add_extent_hole_delay(
        xfs_inode_t             *ip,    /* incore inode pointer */
+       int                     whichfork,
        xfs_extnum_t            *idx,   /* extent number to update/insert */
        xfs_bmbt_irec_t         *new)   /* new data to add to file extents */
 {
@@ -2770,8 +2815,10 @@ xfs_bmap_add_extent_hole_delay(
        int                     state;  /* state bits, accessed thru macros */
        xfs_filblks_t           temp=0; /* temp for indirect calculations */
 
-       ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+       ifp = XFS_IFORK_PTR(ip, whichfork);
        state = 0;
+       if (whichfork == XFS_COW_FORK)
+               state |= BMAP_COWFORK;
        ASSERT(isnullstartblock(new->br_startblock));
 
        /*
@@ -2789,7 +2836,7 @@ xfs_bmap_add_extent_hole_delay(
         * Check and set flags if the current (right) segment exists.
         * If it doesn't exist, we're converting the hole at end-of-file.
         */
-       if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
+       if (*idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
                state |= BMAP_RIGHT_VALID;
                xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right);
 
@@ -2923,6 +2970,7 @@ xfs_bmap_add_extent_hole_real(
        ASSERT(!isnullstartblock(new->br_startblock));
        ASSERT(!bma->cur ||
               !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
+       ASSERT(whichfork != XFS_COW_FORK);
 
        XFS_STATS_INC(mp, xs_add_exlist);
 
@@ -3648,7 +3696,9 @@ xfs_bmap_btalloc(
        else if (mp->m_dalign)
                stripe_align = mp->m_dalign;
 
-       if (xfs_alloc_is_userdata(ap->datatype))
+       if (ap->flags & XFS_BMAPI_COWFORK)
+               align = xfs_get_cowextsz_hint(ap->ip);
+       else if (xfs_alloc_is_userdata(ap->datatype))
                align = xfs_get_extsz_hint(ap->ip);
        if (unlikely(align)) {
                error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
@@ -3856,7 +3906,8 @@ xfs_bmap_btalloc(
                ASSERT(nullfb || fb_agno == args.agno ||
                       (ap->dfops->dop_low && fb_agno < args.agno));
                ap->length = args.len;
-               ap->ip->i_d.di_nblocks += args.len;
+               if (!(ap->flags & XFS_BMAPI_COWFORK))
+                       ap->ip->i_d.di_nblocks += args.len;
                xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
                if (ap->wasdel)
                        ap->ip->i_delayed_blks -= args.len;
@@ -3875,6 +3926,60 @@ xfs_bmap_btalloc(
        return 0;
 }
 
+/*
+ * For a remap operation, just "allocate" an extent at the address that the
+ * caller passed in, and ensure that the AGFL is the right size.  The caller
+ * will then map the "allocated" extent into the file somewhere.
+ */
+STATIC int
+xfs_bmap_remap_alloc(
+       struct xfs_bmalloca     *ap)
+{
+       struct xfs_trans        *tp = ap->tp;
+       struct xfs_mount        *mp = tp->t_mountp;
+       xfs_agblock_t           bno;
+       struct xfs_alloc_arg    args;
+       int                     error;
+
+       /*
+        * validate that the block number is legal - the enables us to detect
+        * and handle a silent filesystem corruption rather than crashing.
+        */
+       memset(&args, 0, sizeof(struct xfs_alloc_arg));
+       args.tp = ap->tp;
+       args.mp = ap->tp->t_mountp;
+       bno = *ap->firstblock;
+       args.agno = XFS_FSB_TO_AGNO(mp, bno);
+       args.agbno = XFS_FSB_TO_AGBNO(mp, bno);
+       if (args.agno >= mp->m_sb.sb_agcount ||
+           args.agbno >= mp->m_sb.sb_agblocks)
+               return -EFSCORRUPTED;
+
+       /* "Allocate" the extent from the range we passed in. */
+       trace_xfs_bmap_remap_alloc(ap->ip, *ap->firstblock, ap->length);
+       ap->blkno = bno;
+       ap->ip->i_d.di_nblocks += ap->length;
+       xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
+
+       /* Fix the freelist, like a real allocator does. */
+       args.datatype = ap->datatype;
+       args.pag = xfs_perag_get(args.mp, args.agno);
+       ASSERT(args.pag);
+
+       /*
+        * The freelist fixing code will decline the allocation if
+        * the size and shape of the free space doesn't allow for
+        * allocating the extent and updating all the metadata that
+        * happens during an allocation.  We're remapping, not
+        * allocating, so skip that check by pretending to be freeing.
+        */
+       error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
+       xfs_perag_put(args.pag);
+       if (error)
+               trace_xfs_bmap_remap_alloc_error(ap->ip, error, _RET_IP_);
+       return error;
+}
+
 /*
  * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
  * It figures out where to ask the underlying allocator to put the new extent.
@@ -3883,12 +3988,47 @@ STATIC int
 xfs_bmap_alloc(
        struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
 {
+       if (ap->flags & XFS_BMAPI_REMAP)
+               return xfs_bmap_remap_alloc(ap);
        if (XFS_IS_REALTIME_INODE(ap->ip) &&
            xfs_alloc_is_userdata(ap->datatype))
                return xfs_bmap_rtalloc(ap);
        return xfs_bmap_btalloc(ap);
 }
 
+/* Trim extent to fit a logical block range. */
+void
+xfs_trim_extent(
+       struct xfs_bmbt_irec    *irec,
+       xfs_fileoff_t           bno,
+       xfs_filblks_t           len)
+{
+       xfs_fileoff_t           distance;
+       xfs_fileoff_t           end = bno + len;
+
+       if (irec->br_startoff + irec->br_blockcount <= bno ||
+           irec->br_startoff >= end) {
+               irec->br_blockcount = 0;
+               return;
+       }
+
+       if (irec->br_startoff < bno) {
+               distance = bno - irec->br_startoff;
+               if (isnullstartblock(irec->br_startblock))
+                       irec->br_startblock = DELAYSTARTBLOCK;
+               if (irec->br_startblock != DELAYSTARTBLOCK &&
+                   irec->br_startblock != HOLESTARTBLOCK)
+                       irec->br_startblock += distance;
+               irec->br_startoff += distance;
+               irec->br_blockcount -= distance;
+       }
+
+       if (end < irec->br_startoff + irec->br_blockcount) {
+               distance = irec->br_startoff + irec->br_blockcount - end;
+               irec->br_blockcount -= distance;
+       }
+}
+
 /*
  * Trim the returned map to the required bounds
  */
@@ -4012,12 +4152,11 @@ xfs_bmapi_read(
        int                     error;
        int                     eof;
        int                     n = 0;
-       int                     whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
-                                               XFS_ATTR_FORK : XFS_DATA_FORK;
+       int                     whichfork = xfs_bmapi_whichfork(flags);
 
        ASSERT(*nmap >= 1);
        ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE|
-                          XFS_BMAPI_IGSTATE)));
+                          XFS_BMAPI_IGSTATE|XFS_BMAPI_COWFORK)));
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));
 
        if (unlikely(XFS_TEST_ERROR(
@@ -4035,6 +4174,16 @@ xfs_bmapi_read(
 
        ifp = XFS_IFORK_PTR(ip, whichfork);
 
+       /* No CoW fork?  Return a hole. */
+       if (whichfork == XFS_COW_FORK && !ifp) {
+               mval->br_startoff = bno;
+               mval->br_startblock = HOLESTARTBLOCK;
+               mval->br_blockcount = len;
+               mval->br_state = XFS_EXT_NORM;
+               *nmap = 1;
+               return 0;
+       }
+
        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
                error = xfs_iread_extents(NULL, ip, whichfork);
                if (error)
@@ -4084,6 +4233,7 @@ xfs_bmapi_read(
 int
 xfs_bmapi_reserve_delalloc(
        struct xfs_inode        *ip,
+       int                     whichfork,
        xfs_fileoff_t           aoff,
        xfs_filblks_t           len,
        struct xfs_bmbt_irec    *got,
@@ -4092,7 +4242,7 @@ xfs_bmapi_reserve_delalloc(
        int                     eof)
 {
        struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
        xfs_extlen_t            alen;
        xfs_extlen_t            indlen;
        char                    rt = XFS_IS_REALTIME_INODE(ip);
@@ -4104,7 +4254,10 @@ xfs_bmapi_reserve_delalloc(
                alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
 
        /* Figure out the extent size, adjust alen */
-       extsz = xfs_get_extsz_hint(ip);
+       if (whichfork == XFS_COW_FORK)
+               extsz = xfs_get_cowextsz_hint(ip);
+       else
+               extsz = xfs_get_extsz_hint(ip);
        if (extsz) {
                error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof,
                                               1, 0, &aoff, &alen);
@@ -4151,7 +4304,7 @@ xfs_bmapi_reserve_delalloc(
        got->br_startblock = nullstartblock(indlen);
        got->br_blockcount = alen;
        got->br_state = XFS_EXT_NORM;
-       xfs_bmap_add_extent_hole_delay(ip, lastx, got);
+       xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got);
 
        /*
         * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay
@@ -4182,8 +4335,7 @@ xfs_bmapi_allocate(
        struct xfs_bmalloca     *bma)
 {
        struct xfs_mount        *mp = bma->ip->i_mount;
-       int                     whichfork = (bma->flags & XFS_BMAPI_ATTRFORK) ?
-                                               XFS_ATTR_FORK : XFS_DATA_FORK;
+       int                     whichfork = xfs_bmapi_whichfork(bma->flags);
        struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
        int                     tmp_logflags = 0;
        int                     error;
@@ -4278,7 +4430,7 @@ xfs_bmapi_allocate(
                bma->got.br_state = XFS_EXT_UNWRITTEN;
 
        if (bma->wasdel)
-               error = xfs_bmap_add_extent_delay_real(bma);
+               error = xfs_bmap_add_extent_delay_real(bma, whichfork);
        else
                error = xfs_bmap_add_extent_hole_real(bma, whichfork);
 
@@ -4308,8 +4460,7 @@ xfs_bmapi_convert_unwritten(
        xfs_filblks_t           len,
        int                     flags)
 {
-       int                     whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
-                                               XFS_ATTR_FORK : XFS_DATA_FORK;
+       int                     whichfork = xfs_bmapi_whichfork(flags);
        struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
        int                     tmp_logflags = 0;
        int                     error;
@@ -4325,6 +4476,8 @@ xfs_bmapi_convert_unwritten(
                        (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
                return 0;
 
+       ASSERT(whichfork != XFS_COW_FORK);
+
        /*
         * Modify (by adding) the state flag, if writing.
         */
@@ -4431,8 +4584,7 @@ xfs_bmapi_write(
        orig_mval = mval;
        orig_nmap = *nmap;
 #endif
-       whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
-               XFS_ATTR_FORK : XFS_DATA_FORK;
+       whichfork = xfs_bmapi_whichfork(flags);
 
        ASSERT(*nmap >= 1);
        ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
@@ -4441,6 +4593,11 @@ xfs_bmapi_write(
        ASSERT(len > 0);
        ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+       ASSERT(!(flags & XFS_BMAPI_REMAP) || whichfork == XFS_DATA_FORK);
+       ASSERT(!(flags & XFS_BMAPI_PREALLOC) || !(flags & XFS_BMAPI_REMAP));
+       ASSERT(!(flags & XFS_BMAPI_CONVERT) || !(flags & XFS_BMAPI_REMAP));
+       ASSERT(!(flags & XFS_BMAPI_PREALLOC) || whichfork != XFS_COW_FORK);
+       ASSERT(!(flags & XFS_BMAPI_CONVERT) || whichfork != XFS_COW_FORK);
 
        /* zeroing is for currently only for data extents, not metadata */
        ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
@@ -4501,6 +4658,14 @@ xfs_bmapi_write(
                inhole = eof || bma.got.br_startoff > bno;
                wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
 
+               /*
+                * Make sure we only reflink into a hole.
+                */
+               if (flags & XFS_BMAPI_REMAP)
+                       ASSERT(inhole);
+               if (flags & XFS_BMAPI_COWFORK)
+                       ASSERT(!inhole);
+
                /*
                 * First, deal with the hole before the allocated space
                 * that we found, if any.
@@ -4531,6 +4696,17 @@ xfs_bmapi_write(
                                goto error0;
                        if (bma.blkno == NULLFSBLOCK)
                                break;
+
+                       /*
+                        * If this is a CoW allocation, record the data in
+                        * the refcount btree for orphan recovery.
+                        */
+                       if (whichfork == XFS_COW_FORK) {
+                               error = xfs_refcount_alloc_cow_extent(mp, dfops,
+                                               bma.blkno, bma.length);
+                               if (error)
+                                       goto error0;
+                       }
                }
 
                /* Deal with the allocated space we found.  */
@@ -4696,7 +4872,8 @@ xfs_bmap_del_extent(
        xfs_btree_cur_t         *cur,   /* if null, not a btree */
        xfs_bmbt_irec_t         *del,   /* data to remove from extents */
        int                     *logflagsp, /* inode logging flags */
-       int                     whichfork) /* data or attr fork */
+       int                     whichfork, /* data or attr fork */
+       int                     bflags) /* bmapi flags */
 {
        xfs_filblks_t           da_new; /* new delay-alloc indirect blocks */
        xfs_filblks_t           da_old; /* old delay-alloc indirect blocks */
@@ -4725,6 +4902,8 @@ xfs_bmap_del_extent(
 
        if (whichfork == XFS_ATTR_FORK)
                state |= BMAP_ATTRFORK;
+       else if (whichfork == XFS_COW_FORK)
+               state |= BMAP_COWFORK;
 
        ifp = XFS_IFORK_PTR(ip, whichfork);
        ASSERT((*idx >= 0) && (*idx < ifp->if_bytes /
@@ -4805,6 +4984,7 @@ xfs_bmap_del_extent(
                /*
                 * Matches the whole extent.  Delete the entry.
                 */
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
                xfs_iext_remove(ip, *idx, 1,
                                whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0);
                --*idx;
@@ -4988,9 +5168,16 @@ xfs_bmap_del_extent(
        /*
         * If we need to, add to list of extents to delete.
         */
-       if (do_fx)
-               xfs_bmap_add_free(mp, dfops, del->br_startblock,
-                               del->br_blockcount, NULL);
+       if (do_fx && !(bflags & XFS_BMAPI_REMAP)) {
+               if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
+                       error = xfs_refcount_decrease_extent(mp, dfops, del);
+                       if (error)
+                               goto done;
+               } else
+                       xfs_bmap_add_free(mp, dfops, del->br_startblock,
+                                       del->br_blockcount, NULL);
+       }
+
        /*
         * Adjust inode # blocks in the file.
         */
@@ -4999,7 +5186,7 @@ xfs_bmap_del_extent(
        /*
         * Adjust quota data.
         */
-       if (qfield)
+       if (qfield && !(bflags & XFS_BMAPI_REMAP))
                xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
 
        /*
@@ -5014,6 +5201,175 @@ done:
        return error;
 }
 
+/* Remove an extent from the CoW fork.  Similar to xfs_bmap_del_extent. */
+int
+xfs_bunmapi_cow(
+       struct xfs_inode                *ip,
+       struct xfs_bmbt_irec            *del)
+{
+       xfs_filblks_t                   da_new;
+       xfs_filblks_t                   da_old;
+       xfs_fsblock_t                   del_endblock = 0;
+       xfs_fileoff_t                   del_endoff;
+       int                             delay;
+       struct xfs_bmbt_rec_host        *ep;
+       int                             error;
+       struct xfs_bmbt_irec            got;
+       xfs_fileoff_t                   got_endoff;
+       struct xfs_ifork                *ifp;
+       struct xfs_mount                *mp;
+       xfs_filblks_t                   nblks;
+       struct xfs_bmbt_irec            new;
+       /* REFERENCED */
+       uint                            qfield;
+       xfs_filblks_t                   temp;
+       xfs_filblks_t                   temp2;
+       int                             state = BMAP_COWFORK;
+       int                             eof;
+       xfs_extnum_t                    eidx;
+
+       mp = ip->i_mount;
+       XFS_STATS_INC(mp, xs_del_exlist);
+
+       ep = xfs_bmap_search_extents(ip, del->br_startoff, XFS_COW_FORK, &eof,
+                       &eidx, &got, &new);
+
+       ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+       ASSERT((eidx >= 0) && (eidx < ifp->if_bytes /
+               (uint)sizeof(xfs_bmbt_rec_t)));
+       ASSERT(del->br_blockcount > 0);
+       ASSERT(got.br_startoff <= del->br_startoff);
+       del_endoff = del->br_startoff + del->br_blockcount;
+       got_endoff = got.br_startoff + got.br_blockcount;
+       ASSERT(got_endoff >= del_endoff);
+       delay = isnullstartblock(got.br_startblock);
+       ASSERT(isnullstartblock(del->br_startblock) == delay);
+       qfield = 0;
+       error = 0;
+       /*
+        * If deleting a real allocation, must free up the disk space.
+        */
+       if (!delay) {
+               nblks = del->br_blockcount;
+               qfield = XFS_TRANS_DQ_BCOUNT;
+               /*
+                * Set up del_endblock and cur for later.
+                */
+               del_endblock = del->br_startblock + del->br_blockcount;
+               da_old = da_new = 0;
+       } else {
+               da_old = startblockval(got.br_startblock);
+               da_new = 0;
+               nblks = 0;
+       }
+       qfield = qfield;
+       nblks = nblks;
+
+       /*
+        * Set flag value to use in switch statement.
+        * Left-contig is 2, right-contig is 1.
+        */
+       switch (((got.br_startoff == del->br_startoff) << 1) |
+               (got_endoff == del_endoff)) {
+       case 3:
+               /*
+                * Matches the whole extent.  Delete the entry.
+                */
+               xfs_iext_remove(ip, eidx, 1, BMAP_COWFORK);
+               --eidx;
+               break;
+
+       case 2:
+               /*
+                * Deleting the first part of the extent.
+                */
+               trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_);
+               xfs_bmbt_set_startoff(ep, del_endoff);
+               temp = got.br_blockcount - del->br_blockcount;
+               xfs_bmbt_set_blockcount(ep, temp);
+               if (delay) {
+                       temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+                               da_old);
+                       xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
+                       trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
+                       da_new = temp;
+                       break;
+               }
+               xfs_bmbt_set_startblock(ep, del_endblock);
+               trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
+               break;
+
+       case 1:
+               /*
+                * Deleting the last part of the extent.
+                */
+               temp = got.br_blockcount - del->br_blockcount;
+               trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(ep, temp);
+               if (delay) {
+                       temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+                               da_old);
+                       xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
+                       trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
+                       da_new = temp;
+                       break;
+               }
+               trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
+               break;
+
+       case 0:
+               /*
+                * Deleting the middle of the extent.
+                */
+               temp = del->br_startoff - got.br_startoff;
+               trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_);
+               xfs_bmbt_set_blockcount(ep, temp);
+               new.br_startoff = del_endoff;
+               temp2 = got_endoff - del_endoff;
+               new.br_blockcount = temp2;
+               new.br_state = got.br_state;
+               if (!delay) {
+                       new.br_startblock = del_endblock;
+               } else {
+                       temp = xfs_bmap_worst_indlen(ip, temp);
+                       xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
+                       temp2 = xfs_bmap_worst_indlen(ip, temp2);
+                       new.br_startblock = nullstartblock((int)temp2);
+                       da_new = temp + temp2;
+                       while (da_new > da_old) {
+                               if (temp) {
+                                       temp--;
+                                       da_new--;
+                                       xfs_bmbt_set_startblock(ep,
+                                               nullstartblock((int)temp));
+                               }
+                               if (da_new == da_old)
+                                       break;
+                               if (temp2) {
+                                       temp2--;
+                                       da_new--;
+                                       new.br_startblock =
+                                               nullstartblock((int)temp2);
+                               }
+                       }
+               }
+               trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
+               xfs_iext_insert(ip, eidx + 1, 1, &new, state);
+               ++eidx;
+               break;
+       }
+
+       /*
+        * Account for change in delayed indirect blocks.
+        * Nothing to do for disk quota accounting here.
+        */
+       ASSERT(da_old >= da_new);
+       if (da_old > da_new)
+               xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false);
+
+       return error;
+}
+
 /*
  * Unmap (remove) blocks from a file.
  * If nexts is nonzero then the number of extents to remove is limited to
@@ -5021,17 +5377,16 @@ done:
  * *done is set.
  */
 int                                            /* error */
-xfs_bunmapi(
+__xfs_bunmapi(
        xfs_trans_t             *tp,            /* transaction pointer */
        struct xfs_inode        *ip,            /* incore inode */
        xfs_fileoff_t           bno,            /* starting offset to unmap */
-       xfs_filblks_t           len,            /* length to unmap in file */
+       xfs_filblks_t           *rlen,          /* i/o: amount remaining */
        int                     flags,          /* misc flags */
        xfs_extnum_t            nexts,          /* number of extents max */
        xfs_fsblock_t           *firstblock,    /* first allocated block
                                                   controls a.g. for allocs */
-       struct xfs_defer_ops    *dfops,         /* i/o: list extents to free */
-       int                     *done)          /* set if not done yet */
+       struct xfs_defer_ops    *dfops)         /* i/o: deferred updates */
 {
        xfs_btree_cur_t         *cur;           /* bmap btree cursor */
        xfs_bmbt_irec_t         del;            /* extent being deleted */
@@ -5053,11 +5408,12 @@ xfs_bunmapi(
        int                     wasdel;         /* was a delayed alloc extent */
        int                     whichfork;      /* data or attribute fork */
        xfs_fsblock_t           sum;
+       xfs_filblks_t           len = *rlen;    /* length to unmap in file */
 
        trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_);
 
-       whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
-               XFS_ATTR_FORK : XFS_DATA_FORK;
+       whichfork = xfs_bmapi_whichfork(flags);
+       ASSERT(whichfork != XFS_COW_FORK);
        ifp = XFS_IFORK_PTR(ip, whichfork);
        if (unlikely(
            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
@@ -5079,7 +5435,7 @@ xfs_bunmapi(
                return error;
        nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
        if (nextents == 0) {
-               *done = 1;
+               *rlen = 0;
                return 0;
        }
        XFS_STATS_INC(mp, xs_blk_unmap);
@@ -5324,7 +5680,7 @@ xfs_bunmapi(
                        cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL;
 
                error = xfs_bmap_del_extent(ip, tp, &lastx, dfops, cur, &del,
-                               &tmp_logflags, whichfork);
+                               &tmp_logflags, whichfork, flags);
                logflags |= tmp_logflags;
                if (error)
                        goto error0;
@@ -5350,7 +5706,10 @@ nodelete:
                        extno++;
                }
        }
-       *done = bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0;
+       if (bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0)
+               *rlen = 0;
+       else
+               *rlen = bno - start + 1;
 
        /*
         * Convert to a btree if necessary.
@@ -5406,6 +5765,27 @@ error0:
        return error;
 }
 
+/* Unmap a range of a file. */
+int
+xfs_bunmapi(
+       xfs_trans_t             *tp,
+       struct xfs_inode        *ip,
+       xfs_fileoff_t           bno,
+       xfs_filblks_t           len,
+       int                     flags,
+       xfs_extnum_t            nexts,
+       xfs_fsblock_t           *firstblock,
+       struct xfs_defer_ops    *dfops,
+       int                     *done)
+{
+       int                     error;
+
+       error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts, firstblock,
+                       dfops);
+       *done = (len == 0);
+       return error;
+}
+
 /*
  * Determine whether an extent shift can be accomplished by a merge with the
  * extent that precedes the target hole of the shift.
@@ -5985,3 +6365,146 @@ out:
        xfs_trans_cancel(tp);
        return error;
 }
+
+/* Deferred mapping is only for real extents in the data fork. */
+static bool
+xfs_bmap_is_update_needed(
+       struct xfs_bmbt_irec    *bmap)
+{
+       return  bmap->br_startblock != HOLESTARTBLOCK &&
+               bmap->br_startblock != DELAYSTARTBLOCK;
+}
+
+/* Record a bmap intent. */
+static int
+__xfs_bmap_add(
+       struct xfs_mount                *mp,
+       struct xfs_defer_ops            *dfops,
+       enum xfs_bmap_intent_type       type,
+       struct xfs_inode                *ip,
+       int                             whichfork,
+       struct xfs_bmbt_irec            *bmap)
+{
+       int                             error;
+       struct xfs_bmap_intent          *bi;
+
+       trace_xfs_bmap_defer(mp,
+                       XFS_FSB_TO_AGNO(mp, bmap->br_startblock),
+                       type,
+                       XFS_FSB_TO_AGBNO(mp, bmap->br_startblock),
+                       ip->i_ino, whichfork,
+                       bmap->br_startoff,
+                       bmap->br_blockcount,
+                       bmap->br_state);
+
+       bi = kmem_alloc(sizeof(struct xfs_bmap_intent), KM_SLEEP | KM_NOFS);
+       INIT_LIST_HEAD(&bi->bi_list);
+       bi->bi_type = type;
+       bi->bi_owner = ip;
+       bi->bi_whichfork = whichfork;
+       bi->bi_bmap = *bmap;
+
+       error = xfs_defer_join(dfops, bi->bi_owner);
+       if (error) {
+               kmem_free(bi);
+               return error;
+       }
+
+       xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_BMAP, &bi->bi_list);
+       return 0;
+}
+
+/* Map an extent into a file. */
+int
+xfs_bmap_map_extent(
+       struct xfs_mount        *mp,
+       struct xfs_defer_ops    *dfops,
+       struct xfs_inode        *ip,
+       struct xfs_bmbt_irec    *PREV)
+{
+       if (!xfs_bmap_is_update_needed(PREV))
+               return 0;
+
+       return __xfs_bmap_add(mp, dfops, XFS_BMAP_MAP, ip,
+                       XFS_DATA_FORK, PREV);
+}
+
+/* Unmap an extent out of a file. */
+int
+xfs_bmap_unmap_extent(
+       struct xfs_mount        *mp,
+       struct xfs_defer_ops    *dfops,
+       struct xfs_inode        *ip,
+       struct xfs_bmbt_irec    *PREV)
+{
+       if (!xfs_bmap_is_update_needed(PREV))
+               return 0;
+
+       return __xfs_bmap_add(mp, dfops, XFS_BMAP_UNMAP, ip,
+                       XFS_DATA_FORK, PREV);
+}
+
+/*
+ * Process one of the deferred bmap operations.  We pass back the
+ * btree cursor to maintain our lock on the bmapbt between calls.
+ */
+int
+xfs_bmap_finish_one(
+       struct xfs_trans                *tp,
+       struct xfs_defer_ops            *dfops,
+       struct xfs_inode                *ip,
+       enum xfs_bmap_intent_type       type,
+       int                             whichfork,
+       xfs_fileoff_t                   startoff,
+       xfs_fsblock_t                   startblock,
+       xfs_filblks_t                   blockcount,
+       xfs_exntst_t                    state)
+{
+       struct xfs_bmbt_irec            bmap;
+       int                             nimaps = 1;
+       xfs_fsblock_t                   firstfsb;
+       int                             flags = XFS_BMAPI_REMAP;
+       int                             done;
+       int                             error = 0;
+
+       bmap.br_startblock = startblock;
+       bmap.br_startoff = startoff;
+       bmap.br_blockcount = blockcount;
+       bmap.br_state = state;
+
+       trace_xfs_bmap_deferred(tp->t_mountp,
+                       XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type,
+                       XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
+                       ip->i_ino, whichfork, startoff, blockcount, state);
+
+       if (whichfork != XFS_DATA_FORK && whichfork != XFS_ATTR_FORK)
+               return -EFSCORRUPTED;
+       if (whichfork == XFS_ATTR_FORK)
+               flags |= XFS_BMAPI_ATTRFORK;
+
+       if (XFS_TEST_ERROR(false, tp->t_mountp,
+                       XFS_ERRTAG_BMAP_FINISH_ONE,
+                       XFS_RANDOM_BMAP_FINISH_ONE))
+               return -EIO;
+
+       switch (type) {
+       case XFS_BMAP_MAP:
+               firstfsb = bmap.br_startblock;
+               error = xfs_bmapi_write(tp, ip, bmap.br_startoff,
+                                       bmap.br_blockcount, flags, &firstfsb,
+                                       bmap.br_blockcount, &bmap, &nimaps,
+                                       dfops);
+               break;
+       case XFS_BMAP_UNMAP:
+               error = xfs_bunmapi(tp, ip, bmap.br_startoff,
+                               bmap.br_blockcount, flags, 1, &firstfsb,
+                               dfops, &done);
+               ASSERT(done);
+               break;
+       default:
+               ASSERT(0);
+               error = -EFSCORRUPTED;
+       }
+
+       return error;
+}