xfs: return work remaining at the end of a bunmapi operation
[cascardo/linux.git] / fs / xfs / libxfs / xfs_bmap.c
1 /*
2  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_shared.h"
21 #include "xfs_format.h"
22 #include "xfs_log_format.h"
23 #include "xfs_trans_resv.h"
24 #include "xfs_bit.h"
25 #include "xfs_sb.h"
26 #include "xfs_mount.h"
27 #include "xfs_defer.h"
28 #include "xfs_da_format.h"
29 #include "xfs_da_btree.h"
30 #include "xfs_dir2.h"
31 #include "xfs_inode.h"
32 #include "xfs_btree.h"
33 #include "xfs_trans.h"
34 #include "xfs_inode_item.h"
35 #include "xfs_extfree_item.h"
36 #include "xfs_alloc.h"
37 #include "xfs_bmap.h"
38 #include "xfs_bmap_util.h"
39 #include "xfs_bmap_btree.h"
40 #include "xfs_rtalloc.h"
41 #include "xfs_error.h"
42 #include "xfs_quota.h"
43 #include "xfs_trans_space.h"
44 #include "xfs_buf_item.h"
45 #include "xfs_trace.h"
46 #include "xfs_symlink.h"
47 #include "xfs_attr_leaf.h"
48 #include "xfs_filestream.h"
49 #include "xfs_rmap.h"
50 #include "xfs_ag_resv.h"
51 #include "xfs_refcount.h"
52
53
54 kmem_zone_t             *xfs_bmap_free_item_zone;
55
56 /*
57  * Miscellaneous helper functions
58  */
59
60 /*
61  * Compute and fill in the value of the maximum depth of a bmap btree
62  * in this filesystem.  Done once, during mount.
63  */
64 void
65 xfs_bmap_compute_maxlevels(
66         xfs_mount_t     *mp,            /* file system mount structure */
67         int             whichfork)      /* data or attr fork */
68 {
69         int             level;          /* btree level */
70         uint            maxblocks;      /* max blocks at this level */
71         uint            maxleafents;    /* max leaf entries possible */
72         int             maxrootrecs;    /* max records in root block */
73         int             minleafrecs;    /* min records in leaf block */
74         int             minnoderecs;    /* min records in node block */
75         int             sz;             /* root block size */
76
77         /*
78          * The maximum number of extents in a file, hence the maximum
79          * number of leaf entries, is controlled by the type of di_nextents
80          * (a signed 32-bit number, xfs_extnum_t), or by di_anextents
81          * (a signed 16-bit number, xfs_aextnum_t).
82          *
83          * Note that we can no longer assume that if we are in ATTR1 that
84          * the fork offset of all the inodes will be
85          * (xfs_default_attroffset(ip) >> 3) because we could have mounted
86          * with ATTR2 and then mounted back with ATTR1, keeping the
87          * di_forkoff's fixed but probably at various positions. Therefore,
88          * for both ATTR1 and ATTR2 we have to assume the worst case scenario
89          * of a minimum size available.
90          */
91         if (whichfork == XFS_DATA_FORK) {
92                 maxleafents = MAXEXTNUM;
93                 sz = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
94         } else {
95                 maxleafents = MAXAEXTNUM;
96                 sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);
97         }
98         maxrootrecs = xfs_bmdr_maxrecs(sz, 0);
99         minleafrecs = mp->m_bmap_dmnr[0];
100         minnoderecs = mp->m_bmap_dmnr[1];
101         maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
102         for (level = 1; maxblocks > 1; level++) {
103                 if (maxblocks <= maxrootrecs)
104                         maxblocks = 1;
105                 else
106                         maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
107         }
108         mp->m_bm_maxlevels[whichfork] = level;
109 }
110
111 STATIC int                              /* error */
112 xfs_bmbt_lookup_eq(
113         struct xfs_btree_cur    *cur,
114         xfs_fileoff_t           off,
115         xfs_fsblock_t           bno,
116         xfs_filblks_t           len,
117         int                     *stat)  /* success/failure */
118 {
119         cur->bc_rec.b.br_startoff = off;
120         cur->bc_rec.b.br_startblock = bno;
121         cur->bc_rec.b.br_blockcount = len;
122         return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
123 }
124
125 STATIC int                              /* error */
126 xfs_bmbt_lookup_ge(
127         struct xfs_btree_cur    *cur,
128         xfs_fileoff_t           off,
129         xfs_fsblock_t           bno,
130         xfs_filblks_t           len,
131         int                     *stat)  /* success/failure */
132 {
133         cur->bc_rec.b.br_startoff = off;
134         cur->bc_rec.b.br_startblock = bno;
135         cur->bc_rec.b.br_blockcount = len;
136         return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
137 }
138
139 /*
140  * Check if the inode needs to be converted to btree format.
141  */
142 static inline bool xfs_bmap_needs_btree(struct xfs_inode *ip, int whichfork)
143 {
144         return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
145                 XFS_IFORK_NEXTENTS(ip, whichfork) >
146                         XFS_IFORK_MAXEXT(ip, whichfork);
147 }
148
149 /*
150  * Check if the inode should be converted to extent format.
151  */
152 static inline bool xfs_bmap_wants_extents(struct xfs_inode *ip, int whichfork)
153 {
154         return XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
155                 XFS_IFORK_NEXTENTS(ip, whichfork) <=
156                         XFS_IFORK_MAXEXT(ip, whichfork);
157 }
158
159 /*
160  * Update the record referred to by cur to the value given
161  * by [off, bno, len, state].
162  * This either works (return 0) or gets an EFSCORRUPTED error.
163  */
164 STATIC int
165 xfs_bmbt_update(
166         struct xfs_btree_cur    *cur,
167         xfs_fileoff_t           off,
168         xfs_fsblock_t           bno,
169         xfs_filblks_t           len,
170         xfs_exntst_t            state)
171 {
172         union xfs_btree_rec     rec;
173
174         xfs_bmbt_disk_set_allf(&rec.bmbt, off, bno, len, state);
175         return xfs_btree_update(cur, &rec);
176 }
177
178 /*
179  * Compute the worst-case number of indirect blocks that will be used
180  * for ip's delayed extent of length "len".
181  */
182 STATIC xfs_filblks_t
183 xfs_bmap_worst_indlen(
184         xfs_inode_t     *ip,            /* incore inode pointer */
185         xfs_filblks_t   len)            /* delayed extent length */
186 {
187         int             level;          /* btree level number */
188         int             maxrecs;        /* maximum record count at this level */
189         xfs_mount_t     *mp;            /* mount structure */
190         xfs_filblks_t   rval;           /* return value */
191
192         mp = ip->i_mount;
193         maxrecs = mp->m_bmap_dmxr[0];
194         for (level = 0, rval = 0;
195              level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK);
196              level++) {
197                 len += maxrecs - 1;
198                 do_div(len, maxrecs);
199                 rval += len;
200                 if (len == 1)
201                         return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) -
202                                 level - 1;
203                 if (level == 0)
204                         maxrecs = mp->m_bmap_dmxr[1];
205         }
206         return rval;
207 }
208
209 /*
210  * Calculate the default attribute fork offset for newly created inodes.
211  */
212 uint
213 xfs_default_attroffset(
214         struct xfs_inode        *ip)
215 {
216         struct xfs_mount        *mp = ip->i_mount;
217         uint                    offset;
218
219         if (mp->m_sb.sb_inodesize == 256) {
220                 offset = XFS_LITINO(mp, ip->i_d.di_version) -
221                                 XFS_BMDR_SPACE_CALC(MINABTPTRS);
222         } else {
223                 offset = XFS_BMDR_SPACE_CALC(6 * MINABTPTRS);
224         }
225
226         ASSERT(offset < XFS_LITINO(mp, ip->i_d.di_version));
227         return offset;
228 }
229
230 /*
231  * Helper routine to reset inode di_forkoff field when switching
232  * attribute fork from local to extent format - we reset it where
233  * possible to make space available for inline data fork extents.
234  */
235 STATIC void
236 xfs_bmap_forkoff_reset(
237         xfs_inode_t     *ip,
238         int             whichfork)
239 {
240         if (whichfork == XFS_ATTR_FORK &&
241             ip->i_d.di_format != XFS_DINODE_FMT_DEV &&
242             ip->i_d.di_format != XFS_DINODE_FMT_UUID &&
243             ip->i_d.di_format != XFS_DINODE_FMT_BTREE) {
244                 uint    dfl_forkoff = xfs_default_attroffset(ip) >> 3;
245
246                 if (dfl_forkoff > ip->i_d.di_forkoff)
247                         ip->i_d.di_forkoff = dfl_forkoff;
248         }
249 }
250
251 #ifdef DEBUG
252 STATIC struct xfs_buf *
253 xfs_bmap_get_bp(
254         struct xfs_btree_cur    *cur,
255         xfs_fsblock_t           bno)
256 {
257         struct xfs_log_item_desc *lidp;
258         int                     i;
259
260         if (!cur)
261                 return NULL;
262
263         for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) {
264                 if (!cur->bc_bufs[i])
265                         break;
266                 if (XFS_BUF_ADDR(cur->bc_bufs[i]) == bno)
267                         return cur->bc_bufs[i];
268         }
269
270         /* Chase down all the log items to see if the bp is there */
271         list_for_each_entry(lidp, &cur->bc_tp->t_items, lid_trans) {
272                 struct xfs_buf_log_item *bip;
273                 bip = (struct xfs_buf_log_item *)lidp->lid_item;
274                 if (bip->bli_item.li_type == XFS_LI_BUF &&
275                     XFS_BUF_ADDR(bip->bli_buf) == bno)
276                         return bip->bli_buf;
277         }
278
279         return NULL;
280 }
281
282 STATIC void
283 xfs_check_block(
284         struct xfs_btree_block  *block,
285         xfs_mount_t             *mp,
286         int                     root,
287         short                   sz)
288 {
289         int                     i, j, dmxr;
290         __be64                  *pp, *thispa;   /* pointer to block address */
291         xfs_bmbt_key_t          *prevp, *keyp;
292
293         ASSERT(be16_to_cpu(block->bb_level) > 0);
294
295         prevp = NULL;
296         for( i = 1; i <= xfs_btree_get_numrecs(block); i++) {
297                 dmxr = mp->m_bmap_dmxr[0];
298                 keyp = XFS_BMBT_KEY_ADDR(mp, block, i);
299
300                 if (prevp) {
301                         ASSERT(be64_to_cpu(prevp->br_startoff) <
302                                be64_to_cpu(keyp->br_startoff));
303                 }
304                 prevp = keyp;
305
306                 /*
307                  * Compare the block numbers to see if there are dups.
308                  */
309                 if (root)
310                         pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, i, sz);
311                 else
312                         pp = XFS_BMBT_PTR_ADDR(mp, block, i, dmxr);
313
314                 for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
315                         if (root)
316                                 thispa = XFS_BMAP_BROOT_PTR_ADDR(mp, block, j, sz);
317                         else
318                                 thispa = XFS_BMBT_PTR_ADDR(mp, block, j, dmxr);
319                         if (*thispa == *pp) {
320                                 xfs_warn(mp, "%s: thispa(%d) == pp(%d) %Ld",
321                                         __func__, j, i,
322                                         (unsigned long long)be64_to_cpu(*thispa));
323                                 panic("%s: ptrs are equal in node\n",
324                                         __func__);
325                         }
326                 }
327         }
328 }
329
330 /*
331  * Check that the extents for the inode ip are in the right order in all
332  * btree leaves. THis becomes prohibitively expensive for large extent count
333  * files, so don't bother with inodes that have more than 10,000 extents in
334  * them. The btree record ordering checks will still be done, so for such large
335  * bmapbt constructs that is going to catch most corruptions.
336  */
337 STATIC void
338 xfs_bmap_check_leaf_extents(
339         xfs_btree_cur_t         *cur,   /* btree cursor or null */
340         xfs_inode_t             *ip,            /* incore inode pointer */
341         int                     whichfork)      /* data or attr fork */
342 {
343         struct xfs_btree_block  *block; /* current btree block */
344         xfs_fsblock_t           bno;    /* block # of "block" */
345         xfs_buf_t               *bp;    /* buffer for "block" */
346         int                     error;  /* error return value */
347         xfs_extnum_t            i=0, j; /* index into the extents list */
348         xfs_ifork_t             *ifp;   /* fork structure */
349         int                     level;  /* btree level, for checking */
350         xfs_mount_t             *mp;    /* file system mount structure */
351         __be64                  *pp;    /* pointer to block address */
352         xfs_bmbt_rec_t          *ep;    /* pointer to current extent */
353         xfs_bmbt_rec_t          last = {0, 0}; /* last extent in prev block */
354         xfs_bmbt_rec_t          *nextp; /* pointer to next extent */
355         int                     bp_release = 0;
356
357         if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE) {
358                 return;
359         }
360
361         /* skip large extent count inodes */
362         if (ip->i_d.di_nextents > 10000)
363                 return;
364
365         bno = NULLFSBLOCK;
366         mp = ip->i_mount;
367         ifp = XFS_IFORK_PTR(ip, whichfork);
368         block = ifp->if_broot;
369         /*
370          * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
371          */
372         level = be16_to_cpu(block->bb_level);
373         ASSERT(level > 0);
374         xfs_check_block(block, mp, 1, ifp->if_broot_bytes);
375         pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
376         bno = be64_to_cpu(*pp);
377
378         ASSERT(bno != NULLFSBLOCK);
379         ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
380         ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
381
382         /*
383          * Go down the tree until leaf level is reached, following the first
384          * pointer (leftmost) at each level.
385          */
386         while (level-- > 0) {
387                 /* See if buf is in cur first */
388                 bp_release = 0;
389                 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
390                 if (!bp) {
391                         bp_release = 1;
392                         error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
393                                                 XFS_BMAP_BTREE_REF,
394                                                 &xfs_bmbt_buf_ops);
395                         if (error)
396                                 goto error_norelse;
397                 }
398                 block = XFS_BUF_TO_BLOCK(bp);
399                 if (level == 0)
400                         break;
401
402                 /*
403                  * Check this block for basic sanity (increasing keys and
404                  * no duplicate blocks).
405                  */
406
407                 xfs_check_block(block, mp, 0, 0);
408                 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
409                 bno = be64_to_cpu(*pp);
410                 XFS_WANT_CORRUPTED_GOTO(mp,
411                                         XFS_FSB_SANITY_CHECK(mp, bno), error0);
412                 if (bp_release) {
413                         bp_release = 0;
414                         xfs_trans_brelse(NULL, bp);
415                 }
416         }
417
418         /*
419          * Here with bp and block set to the leftmost leaf node in the tree.
420          */
421         i = 0;
422
423         /*
424          * Loop over all leaf nodes checking that all extents are in the right order.
425          */
426         for (;;) {
427                 xfs_fsblock_t   nextbno;
428                 xfs_extnum_t    num_recs;
429
430
431                 num_recs = xfs_btree_get_numrecs(block);
432
433                 /*
434                  * Read-ahead the next leaf block, if any.
435                  */
436
437                 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
438
439                 /*
440                  * Check all the extents to make sure they are OK.
441                  * If we had a previous block, the last entry should
442                  * conform with the first entry in this one.
443                  */
444
445                 ep = XFS_BMBT_REC_ADDR(mp, block, 1);
446                 if (i) {
447                         ASSERT(xfs_bmbt_disk_get_startoff(&last) +
448                                xfs_bmbt_disk_get_blockcount(&last) <=
449                                xfs_bmbt_disk_get_startoff(ep));
450                 }
451                 for (j = 1; j < num_recs; j++) {
452                         nextp = XFS_BMBT_REC_ADDR(mp, block, j + 1);
453                         ASSERT(xfs_bmbt_disk_get_startoff(ep) +
454                                xfs_bmbt_disk_get_blockcount(ep) <=
455                                xfs_bmbt_disk_get_startoff(nextp));
456                         ep = nextp;
457                 }
458
459                 last = *ep;
460                 i += num_recs;
461                 if (bp_release) {
462                         bp_release = 0;
463                         xfs_trans_brelse(NULL, bp);
464                 }
465                 bno = nextbno;
466                 /*
467                  * If we've reached the end, stop.
468                  */
469                 if (bno == NULLFSBLOCK)
470                         break;
471
472                 bp_release = 0;
473                 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
474                 if (!bp) {
475                         bp_release = 1;
476                         error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
477                                                 XFS_BMAP_BTREE_REF,
478                                                 &xfs_bmbt_buf_ops);
479                         if (error)
480                                 goto error_norelse;
481                 }
482                 block = XFS_BUF_TO_BLOCK(bp);
483         }
484
485         return;
486
487 error0:
488         xfs_warn(mp, "%s: at error0", __func__);
489         if (bp_release)
490                 xfs_trans_brelse(NULL, bp);
491 error_norelse:
492         xfs_warn(mp, "%s: BAD after btree leaves for %d extents",
493                 __func__, i);
494         panic("%s: CORRUPTED BTREE OR SOMETHING", __func__);
495         return;
496 }
497
498 /*
499  * Add bmap trace insert entries for all the contents of the extent records.
500  */
501 void
502 xfs_bmap_trace_exlist(
503         xfs_inode_t     *ip,            /* incore inode pointer */
504         xfs_extnum_t    cnt,            /* count of entries in the list */
505         int             whichfork,      /* data or attr fork */
506         unsigned long   caller_ip)
507 {
508         xfs_extnum_t    idx;            /* extent record index */
509         xfs_ifork_t     *ifp;           /* inode fork pointer */
510         int             state = 0;
511
512         if (whichfork == XFS_ATTR_FORK)
513                 state |= BMAP_ATTRFORK;
514
515         ifp = XFS_IFORK_PTR(ip, whichfork);
516         ASSERT(cnt == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
517         for (idx = 0; idx < cnt; idx++)
518                 trace_xfs_extlist(ip, idx, whichfork, caller_ip);
519 }
520
521 /*
522  * Validate that the bmbt_irecs being returned from bmapi are valid
523  * given the caller's original parameters.  Specifically check the
524  * ranges of the returned irecs to ensure that they only extend beyond
525  * the given parameters if the XFS_BMAPI_ENTIRE flag was set.
526  */
527 STATIC void
528 xfs_bmap_validate_ret(
529         xfs_fileoff_t           bno,
530         xfs_filblks_t           len,
531         int                     flags,
532         xfs_bmbt_irec_t         *mval,
533         int                     nmap,
534         int                     ret_nmap)
535 {
536         int                     i;              /* index to map values */
537
538         ASSERT(ret_nmap <= nmap);
539
540         for (i = 0; i < ret_nmap; i++) {
541                 ASSERT(mval[i].br_blockcount > 0);
542                 if (!(flags & XFS_BMAPI_ENTIRE)) {
543                         ASSERT(mval[i].br_startoff >= bno);
544                         ASSERT(mval[i].br_blockcount <= len);
545                         ASSERT(mval[i].br_startoff + mval[i].br_blockcount <=
546                                bno + len);
547                 } else {
548                         ASSERT(mval[i].br_startoff < bno + len);
549                         ASSERT(mval[i].br_startoff + mval[i].br_blockcount >
550                                bno);
551                 }
552                 ASSERT(i == 0 ||
553                        mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
554                        mval[i].br_startoff);
555                 ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
556                        mval[i].br_startblock != HOLESTARTBLOCK);
557                 ASSERT(mval[i].br_state == XFS_EXT_NORM ||
558                        mval[i].br_state == XFS_EXT_UNWRITTEN);
559         }
560 }
561
562 #else
563 #define xfs_bmap_check_leaf_extents(cur, ip, whichfork)         do { } while (0)
564 #define xfs_bmap_validate_ret(bno,len,flags,mval,onmap,nmap)
565 #endif /* DEBUG */
566
567 /*
568  * bmap free list manipulation functions
569  */
570
571 /*
572  * Add the extent to the list of extents to be free at transaction end.
573  * The list is maintained sorted (by block number).
574  */
575 void
576 xfs_bmap_add_free(
577         struct xfs_mount                *mp,
578         struct xfs_defer_ops            *dfops,
579         xfs_fsblock_t                   bno,
580         xfs_filblks_t                   len,
581         struct xfs_owner_info           *oinfo)
582 {
583         struct xfs_extent_free_item     *new;           /* new element */
584 #ifdef DEBUG
585         xfs_agnumber_t          agno;
586         xfs_agblock_t           agbno;
587
588         ASSERT(bno != NULLFSBLOCK);
589         ASSERT(len > 0);
590         ASSERT(len <= MAXEXTLEN);
591         ASSERT(!isnullstartblock(bno));
592         agno = XFS_FSB_TO_AGNO(mp, bno);
593         agbno = XFS_FSB_TO_AGBNO(mp, bno);
594         ASSERT(agno < mp->m_sb.sb_agcount);
595         ASSERT(agbno < mp->m_sb.sb_agblocks);
596         ASSERT(len < mp->m_sb.sb_agblocks);
597         ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
598 #endif
599         ASSERT(xfs_bmap_free_item_zone != NULL);
600
601         new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
602         new->xefi_startblock = bno;
603         new->xefi_blockcount = (xfs_extlen_t)len;
604         if (oinfo)
605                 new->xefi_oinfo = *oinfo;
606         else
607                 xfs_rmap_skip_owner_update(&new->xefi_oinfo);
608         trace_xfs_bmap_free_defer(mp, XFS_FSB_TO_AGNO(mp, bno), 0,
609                         XFS_FSB_TO_AGBNO(mp, bno), len);
610         xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list);
611 }
612
613 /*
614  * Inode fork format manipulation functions
615  */
616
617 /*
618  * Transform a btree format file with only one leaf node, where the
619  * extents list will fit in the inode, into an extents format file.
620  * Since the file extents are already in-core, all we have to do is
621  * give up the space for the btree root and pitch the leaf block.
622  */
623 STATIC int                              /* error */
624 xfs_bmap_btree_to_extents(
625         xfs_trans_t             *tp,    /* transaction pointer */
626         xfs_inode_t             *ip,    /* incore inode pointer */
627         xfs_btree_cur_t         *cur,   /* btree cursor */
628         int                     *logflagsp, /* inode logging flags */
629         int                     whichfork)  /* data or attr fork */
630 {
631         /* REFERENCED */
632         struct xfs_btree_block  *cblock;/* child btree block */
633         xfs_fsblock_t           cbno;   /* child block number */
634         xfs_buf_t               *cbp;   /* child block's buffer */
635         int                     error;  /* error return value */
636         xfs_ifork_t             *ifp;   /* inode fork data */
637         xfs_mount_t             *mp;    /* mount point structure */
638         __be64                  *pp;    /* ptr to block address */
639         struct xfs_btree_block  *rblock;/* root btree block */
640         struct xfs_owner_info   oinfo;
641
642         mp = ip->i_mount;
643         ifp = XFS_IFORK_PTR(ip, whichfork);
644         ASSERT(ifp->if_flags & XFS_IFEXTENTS);
645         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
646         rblock = ifp->if_broot;
647         ASSERT(be16_to_cpu(rblock->bb_level) == 1);
648         ASSERT(be16_to_cpu(rblock->bb_numrecs) == 1);
649         ASSERT(xfs_bmbt_maxrecs(mp, ifp->if_broot_bytes, 0) == 1);
650         pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
651         cbno = be64_to_cpu(*pp);
652         *logflagsp = 0;
653 #ifdef DEBUG
654         if ((error = xfs_btree_check_lptr(cur, cbno, 1)))
655                 return error;
656 #endif
657         error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF,
658                                 &xfs_bmbt_buf_ops);
659         if (error)
660                 return error;
661         cblock = XFS_BUF_TO_BLOCK(cbp);
662         if ((error = xfs_btree_check_block(cur, cblock, 0, cbp)))
663                 return error;
664         xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
665         xfs_bmap_add_free(mp, cur->bc_private.b.dfops, cbno, 1, &oinfo);
666         ip->i_d.di_nblocks--;
667         xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
668         xfs_trans_binval(tp, cbp);
669         if (cur->bc_bufs[0] == cbp)
670                 cur->bc_bufs[0] = NULL;
671         xfs_iroot_realloc(ip, -1, whichfork);
672         ASSERT(ifp->if_broot == NULL);
673         ASSERT((ifp->if_flags & XFS_IFBROOT) == 0);
674         XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
675         *logflagsp = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
676         return 0;
677 }
678
679 /*
680  * Convert an extents-format file into a btree-format file.
681  * The new file will have a root block (in the inode) and a single child block.
682  */
683 STATIC int                                      /* error */
684 xfs_bmap_extents_to_btree(
685         xfs_trans_t             *tp,            /* transaction pointer */
686         xfs_inode_t             *ip,            /* incore inode pointer */
687         xfs_fsblock_t           *firstblock,    /* first-block-allocated */
688         struct xfs_defer_ops    *dfops,         /* blocks freed in xaction */
689         xfs_btree_cur_t         **curp,         /* cursor returned to caller */
690         int                     wasdel,         /* converting a delayed alloc */
691         int                     *logflagsp,     /* inode logging flags */
692         int                     whichfork)      /* data or attr fork */
693 {
694         struct xfs_btree_block  *ablock;        /* allocated (child) bt block */
695         xfs_buf_t               *abp;           /* buffer for ablock */
696         xfs_alloc_arg_t         args;           /* allocation arguments */
697         xfs_bmbt_rec_t          *arp;           /* child record pointer */
698         struct xfs_btree_block  *block;         /* btree root block */
699         xfs_btree_cur_t         *cur;           /* bmap btree cursor */
700         xfs_bmbt_rec_host_t     *ep;            /* extent record pointer */
701         int                     error;          /* error return value */
702         xfs_extnum_t            i, cnt;         /* extent record index */
703         xfs_ifork_t             *ifp;           /* inode fork pointer */
704         xfs_bmbt_key_t          *kp;            /* root block key pointer */
705         xfs_mount_t             *mp;            /* mount structure */
706         xfs_extnum_t            nextents;       /* number of file extents */
707         xfs_bmbt_ptr_t          *pp;            /* root block address pointer */
708
709         mp = ip->i_mount;
710         ifp = XFS_IFORK_PTR(ip, whichfork);
711         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);
712
713         /*
714          * Make space in the inode incore.
715          */
716         xfs_iroot_realloc(ip, 1, whichfork);
717         ifp->if_flags |= XFS_IFBROOT;
718
719         /*
720          * Fill in the root.
721          */
722         block = ifp->if_broot;
723         if (xfs_sb_version_hascrc(&mp->m_sb))
724                 xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
725                                  XFS_BMAP_CRC_MAGIC, 1, 1, ip->i_ino,
726                                  XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
727         else
728                 xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL,
729                                  XFS_BMAP_MAGIC, 1, 1, ip->i_ino,
730                                  XFS_BTREE_LONG_PTRS);
731
732         /*
733          * Need a cursor.  Can't allocate until bb_level is filled in.
734          */
735         cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
736         cur->bc_private.b.firstblock = *firstblock;
737         cur->bc_private.b.dfops = dfops;
738         cur->bc_private.b.flags = wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
739         /*
740          * Convert to a btree with two levels, one record in root.
741          */
742         XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
743         memset(&args, 0, sizeof(args));
744         args.tp = tp;
745         args.mp = mp;
746         xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork);
747         args.firstblock = *firstblock;
748         if (*firstblock == NULLFSBLOCK) {
749                 args.type = XFS_ALLOCTYPE_START_BNO;
750                 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
751         } else if (dfops->dop_low) {
752                 args.type = XFS_ALLOCTYPE_START_BNO;
753                 args.fsbno = *firstblock;
754         } else {
755                 args.type = XFS_ALLOCTYPE_NEAR_BNO;
756                 args.fsbno = *firstblock;
757         }
758         args.minlen = args.maxlen = args.prod = 1;
759         args.wasdel = wasdel;
760         *logflagsp = 0;
761         if ((error = xfs_alloc_vextent(&args))) {
762                 xfs_iroot_realloc(ip, -1, whichfork);
763                 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
764                 return error;
765         }
766         /*
767          * Allocation can't fail, the space was reserved.
768          */
769         ASSERT(args.fsbno != NULLFSBLOCK);
770         ASSERT(*firstblock == NULLFSBLOCK ||
771                args.agno == XFS_FSB_TO_AGNO(mp, *firstblock) ||
772                (dfops->dop_low &&
773                 args.agno > XFS_FSB_TO_AGNO(mp, *firstblock)));
774         *firstblock = cur->bc_private.b.firstblock = args.fsbno;
775         cur->bc_private.b.allocated++;
776         ip->i_d.di_nblocks++;
777         xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
778         abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
779         /*
780          * Fill in the child block.
781          */
782         abp->b_ops = &xfs_bmbt_buf_ops;
783         ablock = XFS_BUF_TO_BLOCK(abp);
784         if (xfs_sb_version_hascrc(&mp->m_sb))
785                 xfs_btree_init_block_int(mp, ablock, abp->b_bn,
786                                 XFS_BMAP_CRC_MAGIC, 0, 0, ip->i_ino,
787                                 XFS_BTREE_LONG_PTRS | XFS_BTREE_CRC_BLOCKS);
788         else
789                 xfs_btree_init_block_int(mp, ablock, abp->b_bn,
790                                 XFS_BMAP_MAGIC, 0, 0, ip->i_ino,
791                                 XFS_BTREE_LONG_PTRS);
792
793         arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
794         nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
795         for (cnt = i = 0; i < nextents; i++) {
796                 ep = xfs_iext_get_ext(ifp, i);
797                 if (!isnullstartblock(xfs_bmbt_get_startblock(ep))) {
798                         arp->l0 = cpu_to_be64(ep->l0);
799                         arp->l1 = cpu_to_be64(ep->l1);
800                         arp++; cnt++;
801                 }
802         }
803         ASSERT(cnt == XFS_IFORK_NEXTENTS(ip, whichfork));
804         xfs_btree_set_numrecs(ablock, cnt);
805
806         /*
807          * Fill in the root key and pointer.
808          */
809         kp = XFS_BMBT_KEY_ADDR(mp, block, 1);
810         arp = XFS_BMBT_REC_ADDR(mp, ablock, 1);
811         kp->br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(arp));
812         pp = XFS_BMBT_PTR_ADDR(mp, block, 1, xfs_bmbt_get_maxrecs(cur,
813                                                 be16_to_cpu(block->bb_level)));
814         *pp = cpu_to_be64(args.fsbno);
815
816         /*
817          * Do all this logging at the end so that
818          * the root is at the right level.
819          */
820         xfs_btree_log_block(cur, abp, XFS_BB_ALL_BITS);
821         xfs_btree_log_recs(cur, abp, 1, be16_to_cpu(ablock->bb_numrecs));
822         ASSERT(*curp == NULL);
823         *curp = cur;
824         *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);
825         return 0;
826 }
827
828 /*
829  * Convert a local file to an extents file.
830  * This code is out of bounds for data forks of regular files,
831  * since the file data needs to get logged so things will stay consistent.
832  * (The bmap-level manipulations are ok, though).
833  */
834 void
835 xfs_bmap_local_to_extents_empty(
836         struct xfs_inode        *ip,
837         int                     whichfork)
838 {
839         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
840
841         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
842         ASSERT(ifp->if_bytes == 0);
843         ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0);
844
845         xfs_bmap_forkoff_reset(ip, whichfork);
846         ifp->if_flags &= ~XFS_IFINLINE;
847         ifp->if_flags |= XFS_IFEXTENTS;
848         XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
849 }
850
851
852 STATIC int                              /* error */
853 xfs_bmap_local_to_extents(
854         xfs_trans_t     *tp,            /* transaction pointer */
855         xfs_inode_t     *ip,            /* incore inode pointer */
856         xfs_fsblock_t   *firstblock,    /* first block allocated in xaction */
857         xfs_extlen_t    total,          /* total blocks needed by transaction */
858         int             *logflagsp,     /* inode logging flags */
859         int             whichfork,
860         void            (*init_fn)(struct xfs_trans *tp,
861                                    struct xfs_buf *bp,
862                                    struct xfs_inode *ip,
863                                    struct xfs_ifork *ifp))
864 {
865         int             error = 0;
866         int             flags;          /* logging flags returned */
867         xfs_ifork_t     *ifp;           /* inode fork pointer */
868         xfs_alloc_arg_t args;           /* allocation arguments */
869         xfs_buf_t       *bp;            /* buffer for extent block */
870         xfs_bmbt_rec_host_t *ep;        /* extent record pointer */
871
872         /*
873          * We don't want to deal with the case of keeping inode data inline yet.
874          * So sending the data fork of a regular inode is invalid.
875          */
876         ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK));
877         ifp = XFS_IFORK_PTR(ip, whichfork);
878         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
879
880         if (!ifp->if_bytes) {
881                 xfs_bmap_local_to_extents_empty(ip, whichfork);
882                 flags = XFS_ILOG_CORE;
883                 goto done;
884         }
885
886         flags = 0;
887         error = 0;
888         ASSERT((ifp->if_flags & (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) ==
889                                                                 XFS_IFINLINE);
890         memset(&args, 0, sizeof(args));
891         args.tp = tp;
892         args.mp = ip->i_mount;
893         xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0);
894         args.firstblock = *firstblock;
895         /*
896          * Allocate a block.  We know we need only one, since the
897          * file currently fits in an inode.
898          */
899         if (*firstblock == NULLFSBLOCK) {
900                 args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino);
901                 args.type = XFS_ALLOCTYPE_START_BNO;
902         } else {
903                 args.fsbno = *firstblock;
904                 args.type = XFS_ALLOCTYPE_NEAR_BNO;
905         }
906         args.total = total;
907         args.minlen = args.maxlen = args.prod = 1;
908         error = xfs_alloc_vextent(&args);
909         if (error)
910                 goto done;
911
912         /* Can't fail, the space was reserved. */
913         ASSERT(args.fsbno != NULLFSBLOCK);
914         ASSERT(args.len == 1);
915         *firstblock = args.fsbno;
916         bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
917
918         /*
919          * Initialize the block, copy the data and log the remote buffer.
920          *
921          * The callout is responsible for logging because the remote format
922          * might differ from the local format and thus we don't know how much to
923          * log here. Note that init_fn must also set the buffer log item type
924          * correctly.
925          */
926         init_fn(tp, bp, ip, ifp);
927
928         /* account for the change in fork size */
929         xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
930         xfs_bmap_local_to_extents_empty(ip, whichfork);
931         flags |= XFS_ILOG_CORE;
932
933         xfs_iext_add(ifp, 0, 1);
934         ep = xfs_iext_get_ext(ifp, 0);
935         xfs_bmbt_set_allf(ep, 0, args.fsbno, 1, XFS_EXT_NORM);
936         trace_xfs_bmap_post_update(ip, 0,
937                         whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0,
938                         _THIS_IP_);
939         XFS_IFORK_NEXT_SET(ip, whichfork, 1);
940         ip->i_d.di_nblocks = 1;
941         xfs_trans_mod_dquot_byino(tp, ip,
942                 XFS_TRANS_DQ_BCOUNT, 1L);
943         flags |= xfs_ilog_fext(whichfork);
944
945 done:
946         *logflagsp = flags;
947         return error;
948 }
949
950 /*
951  * Called from xfs_bmap_add_attrfork to handle btree format files.
952  */
953 STATIC int                                      /* error */
954 xfs_bmap_add_attrfork_btree(
955         xfs_trans_t             *tp,            /* transaction pointer */
956         xfs_inode_t             *ip,            /* incore inode pointer */
957         xfs_fsblock_t           *firstblock,    /* first block allocated */
958         struct xfs_defer_ops    *dfops,         /* blocks to free at commit */
959         int                     *flags)         /* inode logging flags */
960 {
961         xfs_btree_cur_t         *cur;           /* btree cursor */
962         int                     error;          /* error return value */
963         xfs_mount_t             *mp;            /* file system mount struct */
964         int                     stat;           /* newroot status */
965
966         mp = ip->i_mount;
967         if (ip->i_df.if_broot_bytes <= XFS_IFORK_DSIZE(ip))
968                 *flags |= XFS_ILOG_DBROOT;
969         else {
970                 cur = xfs_bmbt_init_cursor(mp, tp, ip, XFS_DATA_FORK);
971                 cur->bc_private.b.dfops = dfops;
972                 cur->bc_private.b.firstblock = *firstblock;
973                 if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
974                         goto error0;
975                 /* must be at least one entry */
976                 XFS_WANT_CORRUPTED_GOTO(mp, stat == 1, error0);
977                 if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
978                         goto error0;
979                 if (stat == 0) {
980                         xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
981                         return -ENOSPC;
982                 }
983                 *firstblock = cur->bc_private.b.firstblock;
984                 cur->bc_private.b.allocated = 0;
985                 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
986         }
987         return 0;
988 error0:
989         xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
990         return error;
991 }
992
993 /*
994  * Called from xfs_bmap_add_attrfork to handle extents format files.
995  */
996 STATIC int                                      /* error */
997 xfs_bmap_add_attrfork_extents(
998         xfs_trans_t             *tp,            /* transaction pointer */
999         xfs_inode_t             *ip,            /* incore inode pointer */
1000         xfs_fsblock_t           *firstblock,    /* first block allocated */
1001         struct xfs_defer_ops    *dfops,         /* blocks to free at commit */
1002         int                     *flags)         /* inode logging flags */
1003 {
1004         xfs_btree_cur_t         *cur;           /* bmap btree cursor */
1005         int                     error;          /* error return value */
1006
1007         if (ip->i_d.di_nextents * sizeof(xfs_bmbt_rec_t) <= XFS_IFORK_DSIZE(ip))
1008                 return 0;
1009         cur = NULL;
1010         error = xfs_bmap_extents_to_btree(tp, ip, firstblock, dfops, &cur, 0,
1011                 flags, XFS_DATA_FORK);
1012         if (cur) {
1013                 cur->bc_private.b.allocated = 0;
1014                 xfs_btree_del_cursor(cur,
1015                         error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
1016         }
1017         return error;
1018 }
1019
1020 /*
1021  * Called from xfs_bmap_add_attrfork to handle local format files. Each
1022  * different data fork content type needs a different callout to do the
1023  * conversion. Some are basic and only require special block initialisation
1024  * callouts for the data formating, others (directories) are so specialised they
1025  * handle everything themselves.
1026  *
1027  * XXX (dgc): investigate whether directory conversion can use the generic
1028  * formatting callout. It should be possible - it's just a very complex
1029  * formatter.
1030  */
1031 STATIC int                                      /* error */
1032 xfs_bmap_add_attrfork_local(
1033         xfs_trans_t             *tp,            /* transaction pointer */
1034         xfs_inode_t             *ip,            /* incore inode pointer */
1035         xfs_fsblock_t           *firstblock,    /* first block allocated */
1036         struct xfs_defer_ops    *dfops,         /* blocks to free at commit */
1037         int                     *flags)         /* inode logging flags */
1038 {
1039         xfs_da_args_t           dargs;          /* args for dir/attr code */
1040
1041         if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
1042                 return 0;
1043
1044         if (S_ISDIR(VFS_I(ip)->i_mode)) {
1045                 memset(&dargs, 0, sizeof(dargs));
1046                 dargs.geo = ip->i_mount->m_dir_geo;
1047                 dargs.dp = ip;
1048                 dargs.firstblock = firstblock;
1049                 dargs.dfops = dfops;
1050                 dargs.total = dargs.geo->fsbcount;
1051                 dargs.whichfork = XFS_DATA_FORK;
1052                 dargs.trans = tp;
1053                 return xfs_dir2_sf_to_block(&dargs);
1054         }
1055
1056         if (S_ISLNK(VFS_I(ip)->i_mode))
1057                 return xfs_bmap_local_to_extents(tp, ip, firstblock, 1,
1058                                                  flags, XFS_DATA_FORK,
1059                                                  xfs_symlink_local_to_remote);
1060
1061         /* should only be called for types that support local format data */
1062         ASSERT(0);
1063         return -EFSCORRUPTED;
1064 }
1065
1066 /*
1067  * Convert inode from non-attributed to attributed.
1068  * Must not be in a transaction, ip must not be locked.
1069  */
1070 int                                             /* error code */
1071 xfs_bmap_add_attrfork(
1072         xfs_inode_t             *ip,            /* incore inode pointer */
1073         int                     size,           /* space new attribute needs */
1074         int                     rsvd)           /* xact may use reserved blks */
1075 {
1076         xfs_fsblock_t           firstblock;     /* 1st block/ag allocated */
1077         struct xfs_defer_ops    dfops;          /* freed extent records */
1078         xfs_mount_t             *mp;            /* mount structure */
1079         xfs_trans_t             *tp;            /* transaction pointer */
1080         int                     blks;           /* space reservation */
1081         int                     version = 1;    /* superblock attr version */
1082         int                     logflags;       /* logging flags */
1083         int                     error;          /* error return value */
1084
1085         ASSERT(XFS_IFORK_Q(ip) == 0);
1086
1087         mp = ip->i_mount;
1088         ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
1089
1090         blks = XFS_ADDAFORK_SPACE_RES(mp);
1091
1092         error = xfs_trans_alloc(mp, &M_RES(mp)->tr_addafork, blks, 0,
1093                         rsvd ? XFS_TRANS_RESERVE : 0, &tp);
1094         if (error)
1095                 return error;
1096
1097         xfs_ilock(ip, XFS_ILOCK_EXCL);
1098         error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
1099                         XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
1100                         XFS_QMOPT_RES_REGBLKS);
1101         if (error)
1102                 goto trans_cancel;
1103         if (XFS_IFORK_Q(ip))
1104                 goto trans_cancel;
1105         if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
1106                 /*
1107                  * For inodes coming from pre-6.2 filesystems.
1108                  */
1109                 ASSERT(ip->i_d.di_aformat == 0);
1110                 ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
1111         }
1112         ASSERT(ip->i_d.di_anextents == 0);
1113
1114         xfs_trans_ijoin(tp, ip, 0);
1115         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1116
1117         switch (ip->i_d.di_format) {
1118         case XFS_DINODE_FMT_DEV:
1119                 ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
1120                 break;
1121         case XFS_DINODE_FMT_UUID:
1122                 ip->i_d.di_forkoff = roundup(sizeof(uuid_t), 8) >> 3;
1123                 break;
1124         case XFS_DINODE_FMT_LOCAL:
1125         case XFS_DINODE_FMT_EXTENTS:
1126         case XFS_DINODE_FMT_BTREE:
1127                 ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
1128                 if (!ip->i_d.di_forkoff)
1129                         ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
1130                 else if (mp->m_flags & XFS_MOUNT_ATTR2)
1131                         version = 2;
1132                 break;
1133         default:
1134                 ASSERT(0);
1135                 error = -EINVAL;
1136                 goto trans_cancel;
1137         }
1138
1139         ASSERT(ip->i_afp == NULL);
1140         ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
1141         ip->i_afp->if_flags = XFS_IFEXTENTS;
1142         logflags = 0;
1143         xfs_defer_init(&dfops, &firstblock);
1144         switch (ip->i_d.di_format) {
1145         case XFS_DINODE_FMT_LOCAL:
1146                 error = xfs_bmap_add_attrfork_local(tp, ip, &firstblock, &dfops,
1147                         &logflags);
1148                 break;
1149         case XFS_DINODE_FMT_EXTENTS:
1150                 error = xfs_bmap_add_attrfork_extents(tp, ip, &firstblock,
1151                         &dfops, &logflags);
1152                 break;
1153         case XFS_DINODE_FMT_BTREE:
1154                 error = xfs_bmap_add_attrfork_btree(tp, ip, &firstblock, &dfops,
1155                         &logflags);
1156                 break;
1157         default:
1158                 error = 0;
1159                 break;
1160         }
1161         if (logflags)
1162                 xfs_trans_log_inode(tp, ip, logflags);
1163         if (error)
1164                 goto bmap_cancel;
1165         if (!xfs_sb_version_hasattr(&mp->m_sb) ||
1166            (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
1167                 bool log_sb = false;
1168
1169                 spin_lock(&mp->m_sb_lock);
1170                 if (!xfs_sb_version_hasattr(&mp->m_sb)) {
1171                         xfs_sb_version_addattr(&mp->m_sb);
1172                         log_sb = true;
1173                 }
1174                 if (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2) {
1175                         xfs_sb_version_addattr2(&mp->m_sb);
1176                         log_sb = true;
1177                 }
1178                 spin_unlock(&mp->m_sb_lock);
1179                 if (log_sb)
1180                         xfs_log_sb(tp);
1181         }
1182
1183         error = xfs_defer_finish(&tp, &dfops, NULL);
1184         if (error)
1185                 goto bmap_cancel;
1186         error = xfs_trans_commit(tp);
1187         xfs_iunlock(ip, XFS_ILOCK_EXCL);
1188         return error;
1189
1190 bmap_cancel:
1191         xfs_defer_cancel(&dfops);
1192 trans_cancel:
1193         xfs_trans_cancel(tp);
1194         xfs_iunlock(ip, XFS_ILOCK_EXCL);
1195         return error;
1196 }
1197
1198 /*
1199  * Internal and external extent tree search functions.
1200  */
1201
1202 /*
1203  * Read in the extents to if_extents.
1204  * All inode fields are set up by caller, we just traverse the btree
1205  * and copy the records in. If the file system cannot contain unwritten
1206  * extents, the records are checked for no "state" flags.
1207  */
1208 int                                     /* error */
1209 xfs_bmap_read_extents(
1210         xfs_trans_t             *tp,    /* transaction pointer */
1211         xfs_inode_t             *ip,    /* incore inode */
1212         int                     whichfork) /* data or attr fork */
1213 {
1214         struct xfs_btree_block  *block; /* current btree block */
1215         xfs_fsblock_t           bno;    /* block # of "block" */
1216         xfs_buf_t               *bp;    /* buffer for "block" */
1217         int                     error;  /* error return value */
1218         xfs_exntfmt_t           exntf;  /* XFS_EXTFMT_NOSTATE, if checking */
1219         xfs_extnum_t            i, j;   /* index into the extents list */
1220         xfs_ifork_t             *ifp;   /* fork structure */
1221         int                     level;  /* btree level, for checking */
1222         xfs_mount_t             *mp;    /* file system mount structure */
1223         __be64                  *pp;    /* pointer to block address */
1224         /* REFERENCED */
1225         xfs_extnum_t            room;   /* number of entries there's room for */
1226
1227         bno = NULLFSBLOCK;
1228         mp = ip->i_mount;
1229         ifp = XFS_IFORK_PTR(ip, whichfork);
1230         exntf = (whichfork != XFS_DATA_FORK) ? XFS_EXTFMT_NOSTATE :
1231                                         XFS_EXTFMT_INODE(ip);
1232         block = ifp->if_broot;
1233         /*
1234          * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
1235          */
1236         level = be16_to_cpu(block->bb_level);
1237         ASSERT(level > 0);
1238         pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
1239         bno = be64_to_cpu(*pp);
1240         ASSERT(bno != NULLFSBLOCK);
1241         ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
1242         ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
1243         /*
1244          * Go down the tree until leaf level is reached, following the first
1245          * pointer (leftmost) at each level.
1246          */
1247         while (level-- > 0) {
1248                 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
1249                                 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
1250                 if (error)
1251                         return error;
1252                 block = XFS_BUF_TO_BLOCK(bp);
1253                 if (level == 0)
1254                         break;
1255                 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
1256                 bno = be64_to_cpu(*pp);
1257                 XFS_WANT_CORRUPTED_GOTO(mp,
1258                         XFS_FSB_SANITY_CHECK(mp, bno), error0);
1259                 xfs_trans_brelse(tp, bp);
1260         }
1261         /*
1262          * Here with bp and block set to the leftmost leaf node in the tree.
1263          */
1264         room = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1265         i = 0;
1266         /*
1267          * Loop over all leaf nodes.  Copy information to the extent records.
1268          */
1269         for (;;) {
1270                 xfs_bmbt_rec_t  *frp;
1271                 xfs_fsblock_t   nextbno;
1272                 xfs_extnum_t    num_recs;
1273                 xfs_extnum_t    start;
1274
1275                 num_recs = xfs_btree_get_numrecs(block);
1276                 if (unlikely(i + num_recs > room)) {
1277                         ASSERT(i + num_recs <= room);
1278                         xfs_warn(ip->i_mount,
1279                                 "corrupt dinode %Lu, (btree extents).",
1280                                 (unsigned long long) ip->i_ino);
1281                         XFS_CORRUPTION_ERROR("xfs_bmap_read_extents(1)",
1282                                 XFS_ERRLEVEL_LOW, ip->i_mount, block);
1283                         goto error0;
1284                 }
1285                 /*
1286                  * Read-ahead the next leaf block, if any.
1287                  */
1288                 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
1289                 if (nextbno != NULLFSBLOCK)
1290                         xfs_btree_reada_bufl(mp, nextbno, 1,
1291                                              &xfs_bmbt_buf_ops);
1292                 /*
1293                  * Copy records into the extent records.
1294                  */
1295                 frp = XFS_BMBT_REC_ADDR(mp, block, 1);
1296                 start = i;
1297                 for (j = 0; j < num_recs; j++, i++, frp++) {
1298                         xfs_bmbt_rec_host_t *trp = xfs_iext_get_ext(ifp, i);
1299                         trp->l0 = be64_to_cpu(frp->l0);
1300                         trp->l1 = be64_to_cpu(frp->l1);
1301                 }
1302                 if (exntf == XFS_EXTFMT_NOSTATE) {
1303                         /*
1304                          * Check all attribute bmap btree records and
1305                          * any "older" data bmap btree records for a
1306                          * set bit in the "extent flag" position.
1307                          */
1308                         if (unlikely(xfs_check_nostate_extents(ifp,
1309                                         start, num_recs))) {
1310                                 XFS_ERROR_REPORT("xfs_bmap_read_extents(2)",
1311                                                  XFS_ERRLEVEL_LOW,
1312                                                  ip->i_mount);
1313                                 goto error0;
1314                         }
1315                 }
1316                 xfs_trans_brelse(tp, bp);
1317                 bno = nextbno;
1318                 /*
1319                  * If we've reached the end, stop.
1320                  */
1321                 if (bno == NULLFSBLOCK)
1322                         break;
1323                 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
1324                                 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
1325                 if (error)
1326                         return error;
1327                 block = XFS_BUF_TO_BLOCK(bp);
1328         }
1329         ASSERT(i == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)));
1330         ASSERT(i == XFS_IFORK_NEXTENTS(ip, whichfork));
1331         XFS_BMAP_TRACE_EXLIST(ip, i, whichfork);
1332         return 0;
1333 error0:
1334         xfs_trans_brelse(tp, bp);
1335         return -EFSCORRUPTED;
1336 }
1337
1338
1339 /*
1340  * Search the extent records for the entry containing block bno.
1341  * If bno lies in a hole, point to the next entry.  If bno lies
1342  * past eof, *eofp will be set, and *prevp will contain the last
1343  * entry (null if none).  Else, *lastxp will be set to the index
1344  * of the found entry; *gotp will contain the entry.
1345  */
1346 STATIC xfs_bmbt_rec_host_t *            /* pointer to found extent entry */
1347 xfs_bmap_search_multi_extents(
1348         xfs_ifork_t     *ifp,           /* inode fork pointer */
1349         xfs_fileoff_t   bno,            /* block number searched for */
1350         int             *eofp,          /* out: end of file found */
1351         xfs_extnum_t    *lastxp,        /* out: last extent index */
1352         xfs_bmbt_irec_t *gotp,          /* out: extent entry found */
1353         xfs_bmbt_irec_t *prevp)         /* out: previous extent entry found */
1354 {
1355         xfs_bmbt_rec_host_t *ep;                /* extent record pointer */
1356         xfs_extnum_t    lastx;          /* last extent index */
1357
1358         /*
1359          * Initialize the extent entry structure to catch access to
1360          * uninitialized br_startblock field.
1361          */
1362         gotp->br_startoff = 0xffa5a5a5a5a5a5a5LL;
1363         gotp->br_blockcount = 0xa55a5a5a5a5a5a5aLL;
1364         gotp->br_state = XFS_EXT_INVALID;
1365         gotp->br_startblock = 0xffffa5a5a5a5a5a5LL;
1366         prevp->br_startoff = NULLFILEOFF;
1367
1368         ep = xfs_iext_bno_to_ext(ifp, bno, &lastx);
1369         if (lastx > 0) {
1370                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx - 1), prevp);
1371         }
1372         if (lastx < (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))) {
1373                 xfs_bmbt_get_all(ep, gotp);
1374                 *eofp = 0;
1375         } else {
1376                 if (lastx > 0) {
1377                         *gotp = *prevp;
1378                 }
1379                 *eofp = 1;
1380                 ep = NULL;
1381         }
1382         *lastxp = lastx;
1383         return ep;
1384 }
1385
1386 /*
1387  * Search the extents list for the inode, for the extent containing bno.
1388  * If bno lies in a hole, point to the next entry.  If bno lies past eof,
1389  * *eofp will be set, and *prevp will contain the last entry (null if none).
1390  * Else, *lastxp will be set to the index of the found
1391  * entry; *gotp will contain the entry.
1392  */
1393 xfs_bmbt_rec_host_t *                 /* pointer to found extent entry */
1394 xfs_bmap_search_extents(
1395         xfs_inode_t     *ip,            /* incore inode pointer */
1396         xfs_fileoff_t   bno,            /* block number searched for */
1397         int             fork,           /* data or attr fork */
1398         int             *eofp,          /* out: end of file found */
1399         xfs_extnum_t    *lastxp,        /* out: last extent index */
1400         xfs_bmbt_irec_t *gotp,          /* out: extent entry found */
1401         xfs_bmbt_irec_t *prevp)         /* out: previous extent entry found */
1402 {
1403         xfs_ifork_t     *ifp;           /* inode fork pointer */
1404         xfs_bmbt_rec_host_t  *ep;            /* extent record pointer */
1405
1406         XFS_STATS_INC(ip->i_mount, xs_look_exlist);
1407         ifp = XFS_IFORK_PTR(ip, fork);
1408
1409         ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp);
1410
1411         if (unlikely(!(gotp->br_startblock) && (*lastxp != NULLEXTNUM) &&
1412                      !(XFS_IS_REALTIME_INODE(ip) && fork == XFS_DATA_FORK))) {
1413                 xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,
1414                                 "Access to block zero in inode %llu "
1415                                 "start_block: %llx start_off: %llx "
1416                                 "blkcnt: %llx extent-state: %x lastx: %x",
1417                         (unsigned long long)ip->i_ino,
1418                         (unsigned long long)gotp->br_startblock,
1419                         (unsigned long long)gotp->br_startoff,
1420                         (unsigned long long)gotp->br_blockcount,
1421                         gotp->br_state, *lastxp);
1422                 *lastxp = NULLEXTNUM;
1423                 *eofp = 1;
1424                 return NULL;
1425         }
1426         return ep;
1427 }
1428
1429 /*
1430  * Returns the file-relative block number of the first unused block(s)
1431  * in the file with at least "len" logically contiguous blocks free.
1432  * This is the lowest-address hole if the file has holes, else the first block
1433  * past the end of file.
1434  * Return 0 if the file is currently local (in-inode).
1435  */
1436 int                                             /* error */
1437 xfs_bmap_first_unused(
1438         xfs_trans_t     *tp,                    /* transaction pointer */
1439         xfs_inode_t     *ip,                    /* incore inode */
1440         xfs_extlen_t    len,                    /* size of hole to find */
1441         xfs_fileoff_t   *first_unused,          /* unused block */
1442         int             whichfork)              /* data or attr fork */
1443 {
1444         int             error;                  /* error return value */
1445         int             idx;                    /* extent record index */
1446         xfs_ifork_t     *ifp;                   /* inode fork pointer */
1447         xfs_fileoff_t   lastaddr;               /* last block number seen */
1448         xfs_fileoff_t   lowest;                 /* lowest useful block */
1449         xfs_fileoff_t   max;                    /* starting useful block */
1450         xfs_fileoff_t   off;                    /* offset for this block */
1451         xfs_extnum_t    nextents;               /* number of extent entries */
1452
1453         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE ||
1454                XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ||
1455                XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
1456         if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
1457                 *first_unused = 0;
1458                 return 0;
1459         }
1460         ifp = XFS_IFORK_PTR(ip, whichfork);
1461         if (!(ifp->if_flags & XFS_IFEXTENTS) &&
1462             (error = xfs_iread_extents(tp, ip, whichfork)))
1463                 return error;
1464         lowest = *first_unused;
1465         nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1466         for (idx = 0, lastaddr = 0, max = lowest; idx < nextents; idx++) {
1467                 xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, idx);
1468                 off = xfs_bmbt_get_startoff(ep);
1469                 /*
1470                  * See if the hole before this extent will work.
1471                  */
1472                 if (off >= lowest + len && off - max >= len) {
1473                         *first_unused = max;
1474                         return 0;
1475                 }
1476                 lastaddr = off + xfs_bmbt_get_blockcount(ep);
1477                 max = XFS_FILEOFF_MAX(lastaddr, lowest);
1478         }
1479         *first_unused = max;
1480         return 0;
1481 }
1482
1483 /*
1484  * Returns the file-relative block number of the last block - 1 before
1485  * last_block (input value) in the file.
1486  * This is not based on i_size, it is based on the extent records.
1487  * Returns 0 for local files, as they do not have extent records.
1488  */
1489 int                                             /* error */
1490 xfs_bmap_last_before(
1491         xfs_trans_t     *tp,                    /* transaction pointer */
1492         xfs_inode_t     *ip,                    /* incore inode */
1493         xfs_fileoff_t   *last_block,            /* last block */
1494         int             whichfork)              /* data or attr fork */
1495 {
1496         xfs_fileoff_t   bno;                    /* input file offset */
1497         int             eof;                    /* hit end of file */
1498         xfs_bmbt_rec_host_t *ep;                /* pointer to last extent */
1499         int             error;                  /* error return value */
1500         xfs_bmbt_irec_t got;                    /* current extent value */
1501         xfs_ifork_t     *ifp;                   /* inode fork pointer */
1502         xfs_extnum_t    lastx;                  /* last extent used */
1503         xfs_bmbt_irec_t prev;                   /* previous extent value */
1504
1505         if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
1506             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
1507             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
1508                return -EIO;
1509         if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
1510                 *last_block = 0;
1511                 return 0;
1512         }
1513         ifp = XFS_IFORK_PTR(ip, whichfork);
1514         if (!(ifp->if_flags & XFS_IFEXTENTS) &&
1515             (error = xfs_iread_extents(tp, ip, whichfork)))
1516                 return error;
1517         bno = *last_block - 1;
1518         ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
1519                 &prev);
1520         if (eof || xfs_bmbt_get_startoff(ep) > bno) {
1521                 if (prev.br_startoff == NULLFILEOFF)
1522                         *last_block = 0;
1523                 else
1524                         *last_block = prev.br_startoff + prev.br_blockcount;
1525         }
1526         /*
1527          * Otherwise *last_block is already the right answer.
1528          */
1529         return 0;
1530 }
1531
1532 int
1533 xfs_bmap_last_extent(
1534         struct xfs_trans        *tp,
1535         struct xfs_inode        *ip,
1536         int                     whichfork,
1537         struct xfs_bmbt_irec    *rec,
1538         int                     *is_empty)
1539 {
1540         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
1541         int                     error;
1542         int                     nextents;
1543
1544         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1545                 error = xfs_iread_extents(tp, ip, whichfork);
1546                 if (error)
1547                         return error;
1548         }
1549
1550         nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
1551         if (nextents == 0) {
1552                 *is_empty = 1;
1553                 return 0;
1554         }
1555
1556         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, nextents - 1), rec);
1557         *is_empty = 0;
1558         return 0;
1559 }
1560
1561 /*
1562  * Check the last inode extent to determine whether this allocation will result
1563  * in blocks being allocated at the end of the file. When we allocate new data
1564  * blocks at the end of the file which do not start at the previous data block,
1565  * we will try to align the new blocks at stripe unit boundaries.
1566  *
1567  * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be
1568  * at, or past the EOF.
1569  */
1570 STATIC int
1571 xfs_bmap_isaeof(
1572         struct xfs_bmalloca     *bma,
1573         int                     whichfork)
1574 {
1575         struct xfs_bmbt_irec    rec;
1576         int                     is_empty;
1577         int                     error;
1578
1579         bma->aeof = 0;
1580         error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
1581                                      &is_empty);
1582         if (error)
1583                 return error;
1584
1585         if (is_empty) {
1586                 bma->aeof = 1;
1587                 return 0;
1588         }
1589
1590         /*
1591          * Check if we are allocation or past the last extent, or at least into
1592          * the last delayed allocated extent.
1593          */
1594         bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
1595                 (bma->offset >= rec.br_startoff &&
1596                  isnullstartblock(rec.br_startblock));
1597         return 0;
1598 }
1599
1600 /*
1601  * Returns the file-relative block number of the first block past eof in
1602  * the file.  This is not based on i_size, it is based on the extent records.
1603  * Returns 0 for local files, as they do not have extent records.
1604  */
1605 int
1606 xfs_bmap_last_offset(
1607         struct xfs_inode        *ip,
1608         xfs_fileoff_t           *last_block,
1609         int                     whichfork)
1610 {
1611         struct xfs_bmbt_irec    rec;
1612         int                     is_empty;
1613         int                     error;
1614
1615         *last_block = 0;
1616
1617         if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL)
1618                 return 0;
1619
1620         if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
1621             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
1622                return -EIO;
1623
1624         error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
1625         if (error || is_empty)
1626                 return error;
1627
1628         *last_block = rec.br_startoff + rec.br_blockcount;
1629         return 0;
1630 }
1631
1632 /*
1633  * Returns whether the selected fork of the inode has exactly one
1634  * block or not.  For the data fork we check this matches di_size,
1635  * implying the file's range is 0..bsize-1.
1636  */
1637 int                                     /* 1=>1 block, 0=>otherwise */
1638 xfs_bmap_one_block(
1639         xfs_inode_t     *ip,            /* incore inode */
1640         int             whichfork)      /* data or attr fork */
1641 {
1642         xfs_bmbt_rec_host_t *ep;        /* ptr to fork's extent */
1643         xfs_ifork_t     *ifp;           /* inode fork pointer */
1644         int             rval;           /* return value */
1645         xfs_bmbt_irec_t s;              /* internal version of extent */
1646
1647 #ifndef DEBUG
1648         if (whichfork == XFS_DATA_FORK)
1649                 return XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize;
1650 #endif  /* !DEBUG */
1651         if (XFS_IFORK_NEXTENTS(ip, whichfork) != 1)
1652                 return 0;
1653         if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
1654                 return 0;
1655         ifp = XFS_IFORK_PTR(ip, whichfork);
1656         ASSERT(ifp->if_flags & XFS_IFEXTENTS);
1657         ep = xfs_iext_get_ext(ifp, 0);
1658         xfs_bmbt_get_all(ep, &s);
1659         rval = s.br_startoff == 0 && s.br_blockcount == 1;
1660         if (rval && whichfork == XFS_DATA_FORK)
1661                 ASSERT(XFS_ISIZE(ip) == ip->i_mount->m_sb.sb_blocksize);
1662         return rval;
1663 }
1664
1665 /*
1666  * Extent tree manipulation functions used during allocation.
1667  */
1668
1669 /*
1670  * Convert a delayed allocation to a real allocation.
1671  */
1672 STATIC int                              /* error */
1673 xfs_bmap_add_extent_delay_real(
1674         struct xfs_bmalloca     *bma)
1675 {
1676         struct xfs_bmbt_irec    *new = &bma->got;
1677         int                     diff;   /* temp value */
1678         xfs_bmbt_rec_host_t     *ep;    /* extent entry for idx */
1679         int                     error;  /* error return value */
1680         int                     i;      /* temp state */
1681         xfs_ifork_t             *ifp;   /* inode fork pointer */
1682         xfs_fileoff_t           new_endoff;     /* end offset of new entry */
1683         xfs_bmbt_irec_t         r[3];   /* neighbor extent entries */
1684                                         /* left is 0, right is 1, prev is 2 */
1685         int                     rval=0; /* return value (logging flags) */
1686         int                     state = 0;/* state bits, accessed thru macros */
1687         xfs_filblks_t           da_new; /* new count del alloc blocks used */
1688         xfs_filblks_t           da_old; /* old count del alloc blocks used */
1689         xfs_filblks_t           temp=0; /* value for da_new calculations */
1690         xfs_filblks_t           temp2=0;/* value for da_new calculations */
1691         int                     tmp_rval;       /* partial logging flags */
1692         int                     whichfork = XFS_DATA_FORK;
1693         struct xfs_mount        *mp;
1694
1695         mp = bma->ip->i_mount;
1696         ifp = XFS_IFORK_PTR(bma->ip, whichfork);
1697
1698         ASSERT(bma->idx >= 0);
1699         ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
1700         ASSERT(!isnullstartblock(new->br_startblock));
1701         ASSERT(!bma->cur ||
1702                (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
1703
1704         XFS_STATS_INC(mp, xs_add_exlist);
1705
1706 #define LEFT            r[0]
1707 #define RIGHT           r[1]
1708 #define PREV            r[2]
1709
1710         /*
1711          * Set up a bunch of variables to make the tests simpler.
1712          */
1713         ep = xfs_iext_get_ext(ifp, bma->idx);
1714         xfs_bmbt_get_all(ep, &PREV);
1715         new_endoff = new->br_startoff + new->br_blockcount;
1716         ASSERT(PREV.br_startoff <= new->br_startoff);
1717         ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
1718
1719         da_old = startblockval(PREV.br_startblock);
1720         da_new = 0;
1721
1722         /*
1723          * Set flags determining what part of the previous delayed allocation
1724          * extent is being replaced by a real allocation.
1725          */
1726         if (PREV.br_startoff == new->br_startoff)
1727                 state |= BMAP_LEFT_FILLING;
1728         if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
1729                 state |= BMAP_RIGHT_FILLING;
1730
1731         /*
1732          * Check and set flags if this segment has a left neighbor.
1733          * Don't set contiguous if the combined extent would be too large.
1734          */
1735         if (bma->idx > 0) {
1736                 state |= BMAP_LEFT_VALID;
1737                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &LEFT);
1738
1739                 if (isnullstartblock(LEFT.br_startblock))
1740                         state |= BMAP_LEFT_DELAY;
1741         }
1742
1743         if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
1744             LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
1745             LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
1746             LEFT.br_state == new->br_state &&
1747             LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
1748                 state |= BMAP_LEFT_CONTIG;
1749
1750         /*
1751          * Check and set flags if this segment has a right neighbor.
1752          * Don't set contiguous if the combined extent would be too large.
1753          * Also check for all-three-contiguous being too large.
1754          */
1755         if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
1756                 state |= BMAP_RIGHT_VALID;
1757                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT);
1758
1759                 if (isnullstartblock(RIGHT.br_startblock))
1760                         state |= BMAP_RIGHT_DELAY;
1761         }
1762
1763         if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
1764             new_endoff == RIGHT.br_startoff &&
1765             new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
1766             new->br_state == RIGHT.br_state &&
1767             new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
1768             ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1769                        BMAP_RIGHT_FILLING)) !=
1770                       (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
1771                        BMAP_RIGHT_FILLING) ||
1772              LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
1773                         <= MAXEXTLEN))
1774                 state |= BMAP_RIGHT_CONTIG;
1775
1776         error = 0;
1777         /*
1778          * Switch out based on the FILLING and CONTIG state bits.
1779          */
1780         switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1781                          BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
1782         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
1783              BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1784                 /*
1785                  * Filling in all of a previously delayed allocation extent.
1786                  * The left and right neighbors are both contiguous with new.
1787                  */
1788                 bma->idx--;
1789                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1790                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
1791                         LEFT.br_blockcount + PREV.br_blockcount +
1792                         RIGHT.br_blockcount);
1793                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1794
1795                 xfs_iext_remove(bma->ip, bma->idx + 1, 2, state);
1796                 bma->ip->i_d.di_nextents--;
1797                 if (bma->cur == NULL)
1798                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1799                 else {
1800                         rval = XFS_ILOG_CORE;
1801                         error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
1802                                         RIGHT.br_startblock,
1803                                         RIGHT.br_blockcount, &i);
1804                         if (error)
1805                                 goto done;
1806                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1807                         error = xfs_btree_delete(bma->cur, &i);
1808                         if (error)
1809                                 goto done;
1810                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1811                         error = xfs_btree_decrement(bma->cur, 0, &i);
1812                         if (error)
1813                                 goto done;
1814                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1815                         error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
1816                                         LEFT.br_startblock,
1817                                         LEFT.br_blockcount +
1818                                         PREV.br_blockcount +
1819                                         RIGHT.br_blockcount, LEFT.br_state);
1820                         if (error)
1821                                 goto done;
1822                 }
1823                 break;
1824
1825         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
1826                 /*
1827                  * Filling in all of a previously delayed allocation extent.
1828                  * The left neighbor is contiguous, the right is not.
1829                  */
1830                 bma->idx--;
1831
1832                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1833                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
1834                         LEFT.br_blockcount + PREV.br_blockcount);
1835                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1836
1837                 xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
1838                 if (bma->cur == NULL)
1839                         rval = XFS_ILOG_DEXT;
1840                 else {
1841                         rval = 0;
1842                         error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff,
1843                                         LEFT.br_startblock, LEFT.br_blockcount,
1844                                         &i);
1845                         if (error)
1846                                 goto done;
1847                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1848                         error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
1849                                         LEFT.br_startblock,
1850                                         LEFT.br_blockcount +
1851                                         PREV.br_blockcount, LEFT.br_state);
1852                         if (error)
1853                                 goto done;
1854                 }
1855                 break;
1856
1857         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
1858                 /*
1859                  * Filling in all of a previously delayed allocation extent.
1860                  * The right neighbor is contiguous, the left is not.
1861                  */
1862                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1863                 xfs_bmbt_set_startblock(ep, new->br_startblock);
1864                 xfs_bmbt_set_blockcount(ep,
1865                         PREV.br_blockcount + RIGHT.br_blockcount);
1866                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1867
1868                 xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
1869                 if (bma->cur == NULL)
1870                         rval = XFS_ILOG_DEXT;
1871                 else {
1872                         rval = 0;
1873                         error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
1874                                         RIGHT.br_startblock,
1875                                         RIGHT.br_blockcount, &i);
1876                         if (error)
1877                                 goto done;
1878                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1879                         error = xfs_bmbt_update(bma->cur, PREV.br_startoff,
1880                                         new->br_startblock,
1881                                         PREV.br_blockcount +
1882                                         RIGHT.br_blockcount, PREV.br_state);
1883                         if (error)
1884                                 goto done;
1885                 }
1886                 break;
1887
1888         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
1889                 /*
1890                  * Filling in all of a previously delayed allocation extent.
1891                  * Neither the left nor right neighbors are contiguous with
1892                  * the new one.
1893                  */
1894                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1895                 xfs_bmbt_set_startblock(ep, new->br_startblock);
1896                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1897
1898                 bma->ip->i_d.di_nextents++;
1899                 if (bma->cur == NULL)
1900                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1901                 else {
1902                         rval = XFS_ILOG_CORE;
1903                         error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
1904                                         new->br_startblock, new->br_blockcount,
1905                                         &i);
1906                         if (error)
1907                                 goto done;
1908                         XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
1909                         bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
1910                         error = xfs_btree_insert(bma->cur, &i);
1911                         if (error)
1912                                 goto done;
1913                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1914                 }
1915                 break;
1916
1917         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
1918                 /*
1919                  * Filling in the first part of a previous delayed allocation.
1920                  * The left neighbor is contiguous.
1921                  */
1922                 trace_xfs_bmap_pre_update(bma->ip, bma->idx - 1, state, _THIS_IP_);
1923                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx - 1),
1924                         LEFT.br_blockcount + new->br_blockcount);
1925                 xfs_bmbt_set_startoff(ep,
1926                         PREV.br_startoff + new->br_blockcount);
1927                 trace_xfs_bmap_post_update(bma->ip, bma->idx - 1, state, _THIS_IP_);
1928
1929                 temp = PREV.br_blockcount - new->br_blockcount;
1930                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1931                 xfs_bmbt_set_blockcount(ep, temp);
1932                 if (bma->cur == NULL)
1933                         rval = XFS_ILOG_DEXT;
1934                 else {
1935                         rval = 0;
1936                         error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff,
1937                                         LEFT.br_startblock, LEFT.br_blockcount,
1938                                         &i);
1939                         if (error)
1940                                 goto done;
1941                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1942                         error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
1943                                         LEFT.br_startblock,
1944                                         LEFT.br_blockcount +
1945                                         new->br_blockcount,
1946                                         LEFT.br_state);
1947                         if (error)
1948                                 goto done;
1949                 }
1950                 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1951                         startblockval(PREV.br_startblock));
1952                 xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
1953                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1954
1955                 bma->idx--;
1956                 break;
1957
1958         case BMAP_LEFT_FILLING:
1959                 /*
1960                  * Filling in the first part of a previous delayed allocation.
1961                  * The left neighbor is not contiguous.
1962                  */
1963                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1964                 xfs_bmbt_set_startoff(ep, new_endoff);
1965                 temp = PREV.br_blockcount - new->br_blockcount;
1966                 xfs_bmbt_set_blockcount(ep, temp);
1967                 xfs_iext_insert(bma->ip, bma->idx, 1, new, state);
1968                 bma->ip->i_d.di_nextents++;
1969                 if (bma->cur == NULL)
1970                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
1971                 else {
1972                         rval = XFS_ILOG_CORE;
1973                         error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
1974                                         new->br_startblock, new->br_blockcount,
1975                                         &i);
1976                         if (error)
1977                                 goto done;
1978                         XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
1979                         bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
1980                         error = xfs_btree_insert(bma->cur, &i);
1981                         if (error)
1982                                 goto done;
1983                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
1984                 }
1985
1986                 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
1987                         error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
1988                                         bma->firstblock, bma->dfops,
1989                                         &bma->cur, 1, &tmp_rval, whichfork);
1990                         rval |= tmp_rval;
1991                         if (error)
1992                                 goto done;
1993                 }
1994                 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
1995                         startblockval(PREV.br_startblock) -
1996                         (bma->cur ? bma->cur->bc_private.b.allocated : 0));
1997                 ep = xfs_iext_get_ext(ifp, bma->idx + 1);
1998                 xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
1999                 trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
2000                 break;
2001
2002         case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2003                 /*
2004                  * Filling in the last part of a previous delayed allocation.
2005                  * The right neighbor is contiguous with the new allocation.
2006                  */
2007                 temp = PREV.br_blockcount - new->br_blockcount;
2008                 trace_xfs_bmap_pre_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
2009                 xfs_bmbt_set_blockcount(ep, temp);
2010                 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx + 1),
2011                         new->br_startoff, new->br_startblock,
2012                         new->br_blockcount + RIGHT.br_blockcount,
2013                         RIGHT.br_state);
2014                 trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
2015                 if (bma->cur == NULL)
2016                         rval = XFS_ILOG_DEXT;
2017                 else {
2018                         rval = 0;
2019                         error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
2020                                         RIGHT.br_startblock,
2021                                         RIGHT.br_blockcount, &i);
2022                         if (error)
2023                                 goto done;
2024                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2025                         error = xfs_bmbt_update(bma->cur, new->br_startoff,
2026                                         new->br_startblock,
2027                                         new->br_blockcount +
2028                                         RIGHT.br_blockcount,
2029                                         RIGHT.br_state);
2030                         if (error)
2031                                 goto done;
2032                 }
2033
2034                 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
2035                         startblockval(PREV.br_startblock));
2036                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
2037                 xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
2038                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
2039
2040                 bma->idx++;
2041                 break;
2042
2043         case BMAP_RIGHT_FILLING:
2044                 /*
2045                  * Filling in the last part of a previous delayed allocation.
2046                  * The right neighbor is not contiguous.
2047                  */
2048                 temp = PREV.br_blockcount - new->br_blockcount;
2049                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
2050                 xfs_bmbt_set_blockcount(ep, temp);
2051                 xfs_iext_insert(bma->ip, bma->idx + 1, 1, new, state);
2052                 bma->ip->i_d.di_nextents++;
2053                 if (bma->cur == NULL)
2054                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2055                 else {
2056                         rval = XFS_ILOG_CORE;
2057                         error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
2058                                         new->br_startblock, new->br_blockcount,
2059                                         &i);
2060                         if (error)
2061                                 goto done;
2062                         XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2063                         bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
2064                         error = xfs_btree_insert(bma->cur, &i);
2065                         if (error)
2066                                 goto done;
2067                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2068                 }
2069
2070                 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
2071                         error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2072                                 bma->firstblock, bma->dfops, &bma->cur, 1,
2073                                 &tmp_rval, whichfork);
2074                         rval |= tmp_rval;
2075                         if (error)
2076                                 goto done;
2077                 }
2078                 da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
2079                         startblockval(PREV.br_startblock) -
2080                         (bma->cur ? bma->cur->bc_private.b.allocated : 0));
2081                 ep = xfs_iext_get_ext(ifp, bma->idx);
2082                 xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
2083                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
2084
2085                 bma->idx++;
2086                 break;
2087
2088         case 0:
2089                 /*
2090                  * Filling in the middle part of a previous delayed allocation.
2091                  * Contiguity is impossible here.
2092                  * This case is avoided almost all the time.
2093                  *
2094                  * We start with a delayed allocation:
2095                  *
2096                  * +ddddddddddddddddddddddddddddddddddddddddddddddddddddddd+
2097                  *  PREV @ idx
2098                  *
2099                  * and we are allocating:
2100                  *                     +rrrrrrrrrrrrrrrrr+
2101                  *                            new
2102                  *
2103                  * and we set it up for insertion as:
2104                  * +ddddddddddddddddddd+rrrrrrrrrrrrrrrrr+ddddddddddddddddd+
2105                  *                            new
2106                  *  PREV @ idx          LEFT              RIGHT
2107                  *                      inserted at idx + 1
2108                  */
2109                 temp = new->br_startoff - PREV.br_startoff;
2110                 temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
2111                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, 0, _THIS_IP_);
2112                 xfs_bmbt_set_blockcount(ep, temp);      /* truncate PREV */
2113                 LEFT = *new;
2114                 RIGHT.br_state = PREV.br_state;
2115                 RIGHT.br_startblock = nullstartblock(
2116                                 (int)xfs_bmap_worst_indlen(bma->ip, temp2));
2117                 RIGHT.br_startoff = new_endoff;
2118                 RIGHT.br_blockcount = temp2;
2119                 /* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */
2120                 xfs_iext_insert(bma->ip, bma->idx + 1, 2, &LEFT, state);
2121                 bma->ip->i_d.di_nextents++;
2122                 if (bma->cur == NULL)
2123                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2124                 else {
2125                         rval = XFS_ILOG_CORE;
2126                         error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
2127                                         new->br_startblock, new->br_blockcount,
2128                                         &i);
2129                         if (error)
2130                                 goto done;
2131                         XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2132                         bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
2133                         error = xfs_btree_insert(bma->cur, &i);
2134                         if (error)
2135                                 goto done;
2136                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2137                 }
2138
2139                 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
2140                         error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2141                                         bma->firstblock, bma->dfops, &bma->cur,
2142                                         1, &tmp_rval, whichfork);
2143                         rval |= tmp_rval;
2144                         if (error)
2145                                 goto done;
2146                 }
2147                 temp = xfs_bmap_worst_indlen(bma->ip, temp);
2148                 temp2 = xfs_bmap_worst_indlen(bma->ip, temp2);
2149                 diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
2150                         (bma->cur ? bma->cur->bc_private.b.allocated : 0));
2151                 if (diff > 0) {
2152                         error = xfs_mod_fdblocks(bma->ip->i_mount,
2153                                                  -((int64_t)diff), false);
2154                         ASSERT(!error);
2155                         if (error)
2156                                 goto done;
2157                 }
2158
2159                 ep = xfs_iext_get_ext(ifp, bma->idx);
2160                 xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
2161                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
2162                 trace_xfs_bmap_pre_update(bma->ip, bma->idx + 2, state, _THIS_IP_);
2163                 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, bma->idx + 2),
2164                         nullstartblock((int)temp2));
2165                 trace_xfs_bmap_post_update(bma->ip, bma->idx + 2, state, _THIS_IP_);
2166
2167                 bma->idx++;
2168                 da_new = temp + temp2;
2169                 break;
2170
2171         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2172         case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2173         case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
2174         case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2175         case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2176         case BMAP_LEFT_CONTIG:
2177         case BMAP_RIGHT_CONTIG:
2178                 /*
2179                  * These cases are all impossible.
2180                  */
2181                 ASSERT(0);
2182         }
2183
2184         /* add reverse mapping */
2185         error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip, whichfork, new);
2186         if (error)
2187                 goto done;
2188
2189         /* convert to a btree if necessary */
2190         if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
2191                 int     tmp_logflags;   /* partial log flag return val */
2192
2193                 ASSERT(bma->cur == NULL);
2194                 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2195                                 bma->firstblock, bma->dfops, &bma->cur,
2196                                 da_old > 0, &tmp_logflags, whichfork);
2197                 bma->logflags |= tmp_logflags;
2198                 if (error)
2199                         goto done;
2200         }
2201
2202         /* adjust for changes in reserved delayed indirect blocks */
2203         if (da_old || da_new) {
2204                 temp = da_new;
2205                 if (bma->cur)
2206                         temp += bma->cur->bc_private.b.allocated;
2207                 ASSERT(temp <= da_old);
2208                 if (temp < da_old)
2209                         xfs_mod_fdblocks(bma->ip->i_mount,
2210                                         (int64_t)(da_old - temp), false);
2211         }
2212
2213         /* clear out the allocated field, done with it now in any case. */
2214         if (bma->cur)
2215                 bma->cur->bc_private.b.allocated = 0;
2216
2217         xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
2218 done:
2219         bma->logflags |= rval;
2220         return error;
2221 #undef  LEFT
2222 #undef  RIGHT
2223 #undef  PREV
2224 }
2225
2226 /*
2227  * Convert an unwritten allocation to a real allocation or vice versa.
2228  */
2229 STATIC int                              /* error */
2230 xfs_bmap_add_extent_unwritten_real(
2231         struct xfs_trans        *tp,
2232         xfs_inode_t             *ip,    /* incore inode pointer */
2233         xfs_extnum_t            *idx,   /* extent number to update/insert */
2234         xfs_btree_cur_t         **curp, /* if *curp is null, not a btree */
2235         xfs_bmbt_irec_t         *new,   /* new data to add to file extents */
2236         xfs_fsblock_t           *first, /* pointer to firstblock variable */
2237         struct xfs_defer_ops    *dfops, /* list of extents to be freed */
2238         int                     *logflagsp) /* inode logging flags */
2239 {
2240         xfs_btree_cur_t         *cur;   /* btree cursor */
2241         xfs_bmbt_rec_host_t     *ep;    /* extent entry for idx */
2242         int                     error;  /* error return value */
2243         int                     i;      /* temp state */
2244         xfs_ifork_t             *ifp;   /* inode fork pointer */
2245         xfs_fileoff_t           new_endoff;     /* end offset of new entry */
2246         xfs_exntst_t            newext; /* new extent state */
2247         xfs_exntst_t            oldext; /* old extent state */
2248         xfs_bmbt_irec_t         r[3];   /* neighbor extent entries */
2249                                         /* left is 0, right is 1, prev is 2 */
2250         int                     rval=0; /* return value (logging flags) */
2251         int                     state = 0;/* state bits, accessed thru macros */
2252         struct xfs_mount        *mp = tp->t_mountp;
2253
2254         *logflagsp = 0;
2255
2256         cur = *curp;
2257         ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
2258
2259         ASSERT(*idx >= 0);
2260         ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
2261         ASSERT(!isnullstartblock(new->br_startblock));
2262
2263         XFS_STATS_INC(mp, xs_add_exlist);
2264
2265 #define LEFT            r[0]
2266 #define RIGHT           r[1]
2267 #define PREV            r[2]
2268
2269         /*
2270          * Set up a bunch of variables to make the tests simpler.
2271          */
2272         error = 0;
2273         ep = xfs_iext_get_ext(ifp, *idx);
2274         xfs_bmbt_get_all(ep, &PREV);
2275         newext = new->br_state;
2276         oldext = (newext == XFS_EXT_UNWRITTEN) ?
2277                 XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
2278         ASSERT(PREV.br_state == oldext);
2279         new_endoff = new->br_startoff + new->br_blockcount;
2280         ASSERT(PREV.br_startoff <= new->br_startoff);
2281         ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
2282
2283         /*
2284          * Set flags determining what part of the previous oldext allocation
2285          * extent is being replaced by a newext allocation.
2286          */
2287         if (PREV.br_startoff == new->br_startoff)
2288                 state |= BMAP_LEFT_FILLING;
2289         if (PREV.br_startoff + PREV.br_blockcount == new_endoff)
2290                 state |= BMAP_RIGHT_FILLING;
2291
2292         /*
2293          * Check and set flags if this segment has a left neighbor.
2294          * Don't set contiguous if the combined extent would be too large.
2295          */
2296         if (*idx > 0) {
2297                 state |= BMAP_LEFT_VALID;
2298                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT);
2299
2300                 if (isnullstartblock(LEFT.br_startblock))
2301                         state |= BMAP_LEFT_DELAY;
2302         }
2303
2304         if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2305             LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
2306             LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
2307             LEFT.br_state == newext &&
2308             LEFT.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2309                 state |= BMAP_LEFT_CONTIG;
2310
2311         /*
2312          * Check and set flags if this segment has a right neighbor.
2313          * Don't set contiguous if the combined extent would be too large.
2314          * Also check for all-three-contiguous being too large.
2315          */
2316         if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
2317                 state |= BMAP_RIGHT_VALID;
2318                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
2319                 if (isnullstartblock(RIGHT.br_startblock))
2320                         state |= BMAP_RIGHT_DELAY;
2321         }
2322
2323         if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2324             new_endoff == RIGHT.br_startoff &&
2325             new->br_startblock + new->br_blockcount == RIGHT.br_startblock &&
2326             newext == RIGHT.br_state &&
2327             new->br_blockcount + RIGHT.br_blockcount <= MAXEXTLEN &&
2328             ((state & (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2329                        BMAP_RIGHT_FILLING)) !=
2330                       (BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
2331                        BMAP_RIGHT_FILLING) ||
2332              LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
2333                         <= MAXEXTLEN))
2334                 state |= BMAP_RIGHT_CONTIG;
2335
2336         /*
2337          * Switch out based on the FILLING and CONTIG state bits.
2338          */
2339         switch (state & (BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2340                          BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG)) {
2341         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG |
2342              BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2343                 /*
2344                  * Setting all of a previous oldext extent to newext.
2345                  * The left and right neighbors are both contiguous with new.
2346                  */
2347                 --*idx;
2348
2349                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2350                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
2351                         LEFT.br_blockcount + PREV.br_blockcount +
2352                         RIGHT.br_blockcount);
2353                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2354
2355                 xfs_iext_remove(ip, *idx + 1, 2, state);
2356                 ip->i_d.di_nextents -= 2;
2357                 if (cur == NULL)
2358                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2359                 else {
2360                         rval = XFS_ILOG_CORE;
2361                         if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
2362                                         RIGHT.br_startblock,
2363                                         RIGHT.br_blockcount, &i)))
2364                                 goto done;
2365                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2366                         if ((error = xfs_btree_delete(cur, &i)))
2367                                 goto done;
2368                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2369                         if ((error = xfs_btree_decrement(cur, 0, &i)))
2370                                 goto done;
2371                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2372                         if ((error = xfs_btree_delete(cur, &i)))
2373                                 goto done;
2374                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2375                         if ((error = xfs_btree_decrement(cur, 0, &i)))
2376                                 goto done;
2377                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2378                         if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
2379                                 LEFT.br_startblock,
2380                                 LEFT.br_blockcount + PREV.br_blockcount +
2381                                 RIGHT.br_blockcount, LEFT.br_state)))
2382                                 goto done;
2383                 }
2384                 break;
2385
2386         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2387                 /*
2388                  * Setting all of a previous oldext extent to newext.
2389                  * The left neighbor is contiguous, the right is not.
2390                  */
2391                 --*idx;
2392
2393                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2394                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
2395                         LEFT.br_blockcount + PREV.br_blockcount);
2396                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2397
2398                 xfs_iext_remove(ip, *idx + 1, 1, state);
2399                 ip->i_d.di_nextents--;
2400                 if (cur == NULL)
2401                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2402                 else {
2403                         rval = XFS_ILOG_CORE;
2404                         if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2405                                         PREV.br_startblock, PREV.br_blockcount,
2406                                         &i)))
2407                                 goto done;
2408                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2409                         if ((error = xfs_btree_delete(cur, &i)))
2410                                 goto done;
2411                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2412                         if ((error = xfs_btree_decrement(cur, 0, &i)))
2413                                 goto done;
2414                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2415                         if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
2416                                 LEFT.br_startblock,
2417                                 LEFT.br_blockcount + PREV.br_blockcount,
2418                                 LEFT.br_state)))
2419                                 goto done;
2420                 }
2421                 break;
2422
2423         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2424                 /*
2425                  * Setting all of a previous oldext extent to newext.
2426                  * The right neighbor is contiguous, the left is not.
2427                  */
2428                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2429                 xfs_bmbt_set_blockcount(ep,
2430                         PREV.br_blockcount + RIGHT.br_blockcount);
2431                 xfs_bmbt_set_state(ep, newext);
2432                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2433                 xfs_iext_remove(ip, *idx + 1, 1, state);
2434                 ip->i_d.di_nextents--;
2435                 if (cur == NULL)
2436                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2437                 else {
2438                         rval = XFS_ILOG_CORE;
2439                         if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
2440                                         RIGHT.br_startblock,
2441                                         RIGHT.br_blockcount, &i)))
2442                                 goto done;
2443                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2444                         if ((error = xfs_btree_delete(cur, &i)))
2445                                 goto done;
2446                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2447                         if ((error = xfs_btree_decrement(cur, 0, &i)))
2448                                 goto done;
2449                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2450                         if ((error = xfs_bmbt_update(cur, new->br_startoff,
2451                                 new->br_startblock,
2452                                 new->br_blockcount + RIGHT.br_blockcount,
2453                                 newext)))
2454                                 goto done;
2455                 }
2456                 break;
2457
2458         case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
2459                 /*
2460                  * Setting all of a previous oldext extent to newext.
2461                  * Neither the left nor right neighbors are contiguous with
2462                  * the new one.
2463                  */
2464                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2465                 xfs_bmbt_set_state(ep, newext);
2466                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2467
2468                 if (cur == NULL)
2469                         rval = XFS_ILOG_DEXT;
2470                 else {
2471                         rval = 0;
2472                         if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
2473                                         new->br_startblock, new->br_blockcount,
2474                                         &i)))
2475                                 goto done;
2476                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2477                         if ((error = xfs_bmbt_update(cur, new->br_startoff,
2478                                 new->br_startblock, new->br_blockcount,
2479                                 newext)))
2480                                 goto done;
2481                 }
2482                 break;
2483
2484         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
2485                 /*
2486                  * Setting the first part of a previous oldext extent to newext.
2487                  * The left neighbor is contiguous.
2488                  */
2489                 trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_);
2490                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1),
2491                         LEFT.br_blockcount + new->br_blockcount);
2492                 xfs_bmbt_set_startoff(ep,
2493                         PREV.br_startoff + new->br_blockcount);
2494                 trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_);
2495
2496                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2497                 xfs_bmbt_set_startblock(ep,
2498                         new->br_startblock + new->br_blockcount);
2499                 xfs_bmbt_set_blockcount(ep,
2500                         PREV.br_blockcount - new->br_blockcount);
2501                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2502
2503                 --*idx;
2504
2505                 if (cur == NULL)
2506                         rval = XFS_ILOG_DEXT;
2507                 else {
2508                         rval = 0;
2509                         if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2510                                         PREV.br_startblock, PREV.br_blockcount,
2511                                         &i)))
2512                                 goto done;
2513                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2514                         if ((error = xfs_bmbt_update(cur,
2515                                 PREV.br_startoff + new->br_blockcount,
2516                                 PREV.br_startblock + new->br_blockcount,
2517                                 PREV.br_blockcount - new->br_blockcount,
2518                                 oldext)))
2519                                 goto done;
2520                         if ((error = xfs_btree_decrement(cur, 0, &i)))
2521                                 goto done;
2522                         error = xfs_bmbt_update(cur, LEFT.br_startoff,
2523                                 LEFT.br_startblock,
2524                                 LEFT.br_blockcount + new->br_blockcount,
2525                                 LEFT.br_state);
2526                         if (error)
2527                                 goto done;
2528                 }
2529                 break;
2530
2531         case BMAP_LEFT_FILLING:
2532                 /*
2533                  * Setting the first part of a previous oldext extent to newext.
2534                  * The left neighbor is not contiguous.
2535                  */
2536                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2537                 ASSERT(ep && xfs_bmbt_get_state(ep) == oldext);
2538                 xfs_bmbt_set_startoff(ep, new_endoff);
2539                 xfs_bmbt_set_blockcount(ep,
2540                         PREV.br_blockcount - new->br_blockcount);
2541                 xfs_bmbt_set_startblock(ep,
2542                         new->br_startblock + new->br_blockcount);
2543                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2544
2545                 xfs_iext_insert(ip, *idx, 1, new, state);
2546                 ip->i_d.di_nextents++;
2547                 if (cur == NULL)
2548                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2549                 else {
2550                         rval = XFS_ILOG_CORE;
2551                         if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2552                                         PREV.br_startblock, PREV.br_blockcount,
2553                                         &i)))
2554                                 goto done;
2555                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2556                         if ((error = xfs_bmbt_update(cur,
2557                                 PREV.br_startoff + new->br_blockcount,
2558                                 PREV.br_startblock + new->br_blockcount,
2559                                 PREV.br_blockcount - new->br_blockcount,
2560                                 oldext)))
2561                                 goto done;
2562                         cur->bc_rec.b = *new;
2563                         if ((error = xfs_btree_insert(cur, &i)))
2564                                 goto done;
2565                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2566                 }
2567                 break;
2568
2569         case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
2570                 /*
2571                  * Setting the last part of a previous oldext extent to newext.
2572                  * The right neighbor is contiguous with the new allocation.
2573                  */
2574                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2575                 xfs_bmbt_set_blockcount(ep,
2576                         PREV.br_blockcount - new->br_blockcount);
2577                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2578
2579                 ++*idx;
2580
2581                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2582                 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
2583                         new->br_startoff, new->br_startblock,
2584                         new->br_blockcount + RIGHT.br_blockcount, newext);
2585                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2586
2587                 if (cur == NULL)
2588                         rval = XFS_ILOG_DEXT;
2589                 else {
2590                         rval = 0;
2591                         if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2592                                         PREV.br_startblock,
2593                                         PREV.br_blockcount, &i)))
2594                                 goto done;
2595                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2596                         if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
2597                                 PREV.br_startblock,
2598                                 PREV.br_blockcount - new->br_blockcount,
2599                                 oldext)))
2600                                 goto done;
2601                         if ((error = xfs_btree_increment(cur, 0, &i)))
2602                                 goto done;
2603                         if ((error = xfs_bmbt_update(cur, new->br_startoff,
2604                                 new->br_startblock,
2605                                 new->br_blockcount + RIGHT.br_blockcount,
2606                                 newext)))
2607                                 goto done;
2608                 }
2609                 break;
2610
2611         case BMAP_RIGHT_FILLING:
2612                 /*
2613                  * Setting the last part of a previous oldext extent to newext.
2614                  * The right neighbor is not contiguous.
2615                  */
2616                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2617                 xfs_bmbt_set_blockcount(ep,
2618                         PREV.br_blockcount - new->br_blockcount);
2619                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2620
2621                 ++*idx;
2622                 xfs_iext_insert(ip, *idx, 1, new, state);
2623
2624                 ip->i_d.di_nextents++;
2625                 if (cur == NULL)
2626                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2627                 else {
2628                         rval = XFS_ILOG_CORE;
2629                         if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2630                                         PREV.br_startblock, PREV.br_blockcount,
2631                                         &i)))
2632                                 goto done;
2633                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2634                         if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
2635                                 PREV.br_startblock,
2636                                 PREV.br_blockcount - new->br_blockcount,
2637                                 oldext)))
2638                                 goto done;
2639                         if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
2640                                         new->br_startblock, new->br_blockcount,
2641                                         &i)))
2642                                 goto done;
2643                         XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2644                         cur->bc_rec.b.br_state = XFS_EXT_NORM;
2645                         if ((error = xfs_btree_insert(cur, &i)))
2646                                 goto done;
2647                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2648                 }
2649                 break;
2650
2651         case 0:
2652                 /*
2653                  * Setting the middle part of a previous oldext extent to
2654                  * newext.  Contiguity is impossible here.
2655                  * One extent becomes three extents.
2656                  */
2657                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2658                 xfs_bmbt_set_blockcount(ep,
2659                         new->br_startoff - PREV.br_startoff);
2660                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2661
2662                 r[0] = *new;
2663                 r[1].br_startoff = new_endoff;
2664                 r[1].br_blockcount =
2665                         PREV.br_startoff + PREV.br_blockcount - new_endoff;
2666                 r[1].br_startblock = new->br_startblock + new->br_blockcount;
2667                 r[1].br_state = oldext;
2668
2669                 ++*idx;
2670                 xfs_iext_insert(ip, *idx, 2, &r[0], state);
2671
2672                 ip->i_d.di_nextents += 2;
2673                 if (cur == NULL)
2674                         rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2675                 else {
2676                         rval = XFS_ILOG_CORE;
2677                         if ((error = xfs_bmbt_lookup_eq(cur, PREV.br_startoff,
2678                                         PREV.br_startblock, PREV.br_blockcount,
2679                                         &i)))
2680                                 goto done;
2681                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2682                         /* new right extent - oldext */
2683                         if ((error = xfs_bmbt_update(cur, r[1].br_startoff,
2684                                 r[1].br_startblock, r[1].br_blockcount,
2685                                 r[1].br_state)))
2686                                 goto done;
2687                         /* new left extent - oldext */
2688                         cur->bc_rec.b = PREV;
2689                         cur->bc_rec.b.br_blockcount =
2690                                 new->br_startoff - PREV.br_startoff;
2691                         if ((error = xfs_btree_insert(cur, &i)))
2692                                 goto done;
2693                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2694                         /*
2695                          * Reset the cursor to the position of the new extent
2696                          * we are about to insert as we can't trust it after
2697                          * the previous insert.
2698                          */
2699                         if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
2700                                         new->br_startblock, new->br_blockcount,
2701                                         &i)))
2702                                 goto done;
2703                         XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
2704                         /* new middle extent - newext */
2705                         cur->bc_rec.b.br_state = new->br_state;
2706                         if ((error = xfs_btree_insert(cur, &i)))
2707                                 goto done;
2708                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2709                 }
2710                 break;
2711
2712         case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2713         case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2714         case BMAP_LEFT_FILLING | BMAP_RIGHT_CONTIG:
2715         case BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
2716         case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2717         case BMAP_LEFT_CONTIG:
2718         case BMAP_RIGHT_CONTIG:
2719                 /*
2720                  * These cases are all impossible.
2721                  */
2722                 ASSERT(0);
2723         }
2724
2725         /* update reverse mappings */
2726         error = xfs_rmap_convert_extent(mp, dfops, ip, XFS_DATA_FORK, new);
2727         if (error)
2728                 goto done;
2729
2730         /* convert to a btree if necessary */
2731         if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) {
2732                 int     tmp_logflags;   /* partial log flag return val */
2733
2734                 ASSERT(cur == NULL);
2735                 error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur,
2736                                 0, &tmp_logflags, XFS_DATA_FORK);
2737                 *logflagsp |= tmp_logflags;
2738                 if (error)
2739                         goto done;
2740         }
2741
2742         /* clear out the allocated field, done with it now in any case. */
2743         if (cur) {
2744                 cur->bc_private.b.allocated = 0;
2745                 *curp = cur;
2746         }
2747
2748         xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK);
2749 done:
2750         *logflagsp |= rval;
2751         return error;
2752 #undef  LEFT
2753 #undef  RIGHT
2754 #undef  PREV
2755 }
2756
2757 /*
2758  * Convert a hole to a delayed allocation.
2759  */
2760 STATIC void
2761 xfs_bmap_add_extent_hole_delay(
2762         xfs_inode_t             *ip,    /* incore inode pointer */
2763         xfs_extnum_t            *idx,   /* extent number to update/insert */
2764         xfs_bmbt_irec_t         *new)   /* new data to add to file extents */
2765 {
2766         xfs_ifork_t             *ifp;   /* inode fork pointer */
2767         xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
2768         xfs_filblks_t           newlen=0;       /* new indirect size */
2769         xfs_filblks_t           oldlen=0;       /* old indirect size */
2770         xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
2771         int                     state;  /* state bits, accessed thru macros */
2772         xfs_filblks_t           temp=0; /* temp for indirect calculations */
2773
2774         ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
2775         state = 0;
2776         ASSERT(isnullstartblock(new->br_startblock));
2777
2778         /*
2779          * Check and set flags if this segment has a left neighbor
2780          */
2781         if (*idx > 0) {
2782                 state |= BMAP_LEFT_VALID;
2783                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left);
2784
2785                 if (isnullstartblock(left.br_startblock))
2786                         state |= BMAP_LEFT_DELAY;
2787         }
2788
2789         /*
2790          * Check and set flags if the current (right) segment exists.
2791          * If it doesn't exist, we're converting the hole at end-of-file.
2792          */
2793         if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
2794                 state |= BMAP_RIGHT_VALID;
2795                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right);
2796
2797                 if (isnullstartblock(right.br_startblock))
2798                         state |= BMAP_RIGHT_DELAY;
2799         }
2800
2801         /*
2802          * Set contiguity flags on the left and right neighbors.
2803          * Don't let extents get too large, even if the pieces are contiguous.
2804          */
2805         if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
2806             left.br_startoff + left.br_blockcount == new->br_startoff &&
2807             left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2808                 state |= BMAP_LEFT_CONTIG;
2809
2810         if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
2811             new->br_startoff + new->br_blockcount == right.br_startoff &&
2812             new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2813             (!(state & BMAP_LEFT_CONTIG) ||
2814              (left.br_blockcount + new->br_blockcount +
2815               right.br_blockcount <= MAXEXTLEN)))
2816                 state |= BMAP_RIGHT_CONTIG;
2817
2818         /*
2819          * Switch out based on the contiguity flags.
2820          */
2821         switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2822         case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2823                 /*
2824                  * New allocation is contiguous with delayed allocations
2825                  * on the left and on the right.
2826                  * Merge all three into a single extent record.
2827                  */
2828                 --*idx;
2829                 temp = left.br_blockcount + new->br_blockcount +
2830                         right.br_blockcount;
2831
2832                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2833                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
2834                 oldlen = startblockval(left.br_startblock) +
2835                         startblockval(new->br_startblock) +
2836                         startblockval(right.br_startblock);
2837                 newlen = xfs_bmap_worst_indlen(ip, temp);
2838                 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
2839                         nullstartblock((int)newlen));
2840                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2841
2842                 xfs_iext_remove(ip, *idx + 1, 1, state);
2843                 break;
2844
2845         case BMAP_LEFT_CONTIG:
2846                 /*
2847                  * New allocation is contiguous with a delayed allocation
2848                  * on the left.
2849                  * Merge the new allocation with the left neighbor.
2850                  */
2851                 --*idx;
2852                 temp = left.br_blockcount + new->br_blockcount;
2853
2854                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2855                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx), temp);
2856                 oldlen = startblockval(left.br_startblock) +
2857                         startblockval(new->br_startblock);
2858                 newlen = xfs_bmap_worst_indlen(ip, temp);
2859                 xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx),
2860                         nullstartblock((int)newlen));
2861                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2862                 break;
2863
2864         case BMAP_RIGHT_CONTIG:
2865                 /*
2866                  * New allocation is contiguous with a delayed allocation
2867                  * on the right.
2868                  * Merge the new allocation with the right neighbor.
2869                  */
2870                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
2871                 temp = new->br_blockcount + right.br_blockcount;
2872                 oldlen = startblockval(new->br_startblock) +
2873                         startblockval(right.br_startblock);
2874                 newlen = xfs_bmap_worst_indlen(ip, temp);
2875                 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
2876                         new->br_startoff,
2877                         nullstartblock((int)newlen), temp, right.br_state);
2878                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2879                 break;
2880
2881         case 0:
2882                 /*
2883                  * New allocation is not contiguous with another
2884                  * delayed allocation.
2885                  * Insert a new entry.
2886                  */
2887                 oldlen = newlen = 0;
2888                 xfs_iext_insert(ip, *idx, 1, new, state);
2889                 break;
2890         }
2891         if (oldlen != newlen) {
2892                 ASSERT(oldlen > newlen);
2893                 xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
2894                                  false);
2895                 /*
2896                  * Nothing to do for disk quota accounting here.
2897                  */
2898         }
2899 }
2900
2901 /*
2902  * Convert a hole to a real allocation.
2903  */
2904 STATIC int                              /* error */
2905 xfs_bmap_add_extent_hole_real(
2906         struct xfs_bmalloca     *bma,
2907         int                     whichfork)
2908 {
2909         struct xfs_bmbt_irec    *new = &bma->got;
2910         int                     error;  /* error return value */
2911         int                     i;      /* temp state */
2912         xfs_ifork_t             *ifp;   /* inode fork pointer */
2913         xfs_bmbt_irec_t         left;   /* left neighbor extent entry */
2914         xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
2915         int                     rval=0; /* return value (logging flags) */
2916         int                     state;  /* state bits, accessed thru macros */
2917         struct xfs_mount        *mp;
2918
2919         mp = bma->ip->i_mount;
2920         ifp = XFS_IFORK_PTR(bma->ip, whichfork);
2921
2922         ASSERT(bma->idx >= 0);
2923         ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
2924         ASSERT(!isnullstartblock(new->br_startblock));
2925         ASSERT(!bma->cur ||
2926                !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
2927
2928         XFS_STATS_INC(mp, xs_add_exlist);
2929
2930         state = 0;
2931         if (whichfork == XFS_ATTR_FORK)
2932                 state |= BMAP_ATTRFORK;
2933
2934         /*
2935          * Check and set flags if this segment has a left neighbor.
2936          */
2937         if (bma->idx > 0) {
2938                 state |= BMAP_LEFT_VALID;
2939                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &left);
2940                 if (isnullstartblock(left.br_startblock))
2941                         state |= BMAP_LEFT_DELAY;
2942         }
2943
2944         /*
2945          * Check and set flags if this segment has a current value.
2946          * Not true if we're inserting into the "hole" at eof.
2947          */
2948         if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
2949                 state |= BMAP_RIGHT_VALID;
2950                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right);
2951                 if (isnullstartblock(right.br_startblock))
2952                         state |= BMAP_RIGHT_DELAY;
2953         }
2954
2955         /*
2956          * We're inserting a real allocation between "left" and "right".
2957          * Set the contiguity flags.  Don't let extents get too large.
2958          */
2959         if ((state & BMAP_LEFT_VALID) && !(state & BMAP_LEFT_DELAY) &&
2960             left.br_startoff + left.br_blockcount == new->br_startoff &&
2961             left.br_startblock + left.br_blockcount == new->br_startblock &&
2962             left.br_state == new->br_state &&
2963             left.br_blockcount + new->br_blockcount <= MAXEXTLEN)
2964                 state |= BMAP_LEFT_CONTIG;
2965
2966         if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
2967             new->br_startoff + new->br_blockcount == right.br_startoff &&
2968             new->br_startblock + new->br_blockcount == right.br_startblock &&
2969             new->br_state == right.br_state &&
2970             new->br_blockcount + right.br_blockcount <= MAXEXTLEN &&
2971             (!(state & BMAP_LEFT_CONTIG) ||
2972              left.br_blockcount + new->br_blockcount +
2973              right.br_blockcount <= MAXEXTLEN))
2974                 state |= BMAP_RIGHT_CONTIG;
2975
2976         error = 0;
2977         /*
2978          * Select which case we're in here, and implement it.
2979          */
2980         switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
2981         case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
2982                 /*
2983                  * New allocation is contiguous with real allocations on the
2984                  * left and on the right.
2985                  * Merge all three into a single extent record.
2986                  */
2987                 --bma->idx;
2988                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
2989                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
2990                         left.br_blockcount + new->br_blockcount +
2991                         right.br_blockcount);
2992                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
2993
2994                 xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
2995
2996                 XFS_IFORK_NEXT_SET(bma->ip, whichfork,
2997                         XFS_IFORK_NEXTENTS(bma->ip, whichfork) - 1);
2998                 if (bma->cur == NULL) {
2999                         rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
3000                 } else {
3001                         rval = XFS_ILOG_CORE;
3002                         error = xfs_bmbt_lookup_eq(bma->cur, right.br_startoff,
3003                                         right.br_startblock, right.br_blockcount,
3004                                         &i);
3005                         if (error)
3006                                 goto done;
3007                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3008                         error = xfs_btree_delete(bma->cur, &i);
3009                         if (error)
3010                                 goto done;
3011                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3012                         error = xfs_btree_decrement(bma->cur, 0, &i);
3013                         if (error)
3014                                 goto done;
3015                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3016                         error = xfs_bmbt_update(bma->cur, left.br_startoff,
3017                                         left.br_startblock,
3018                                         left.br_blockcount +
3019                                                 new->br_blockcount +
3020                                                 right.br_blockcount,
3021                                         left.br_state);
3022                         if (error)
3023                                 goto done;
3024                 }
3025                 break;
3026
3027         case BMAP_LEFT_CONTIG:
3028                 /*
3029                  * New allocation is contiguous with a real allocation
3030                  * on the left.
3031                  * Merge the new allocation with the left neighbor.
3032                  */
3033                 --bma->idx;
3034                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
3035                 xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
3036                         left.br_blockcount + new->br_blockcount);
3037                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
3038
3039                 if (bma->cur == NULL) {
3040                         rval = xfs_ilog_fext(whichfork);
3041                 } else {
3042                         rval = 0;
3043                         error = xfs_bmbt_lookup_eq(bma->cur, left.br_startoff,
3044                                         left.br_startblock, left.br_blockcount,
3045                                         &i);
3046                         if (error)
3047                                 goto done;
3048                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3049                         error = xfs_bmbt_update(bma->cur, left.br_startoff,
3050                                         left.br_startblock,
3051                                         left.br_blockcount +
3052                                                 new->br_blockcount,
3053                                         left.br_state);
3054                         if (error)
3055                                 goto done;
3056                 }
3057                 break;
3058
3059         case BMAP_RIGHT_CONTIG:
3060                 /*
3061                  * New allocation is contiguous with a real allocation
3062                  * on the right.
3063                  * Merge the new allocation with the right neighbor.
3064                  */
3065                 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
3066                 xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx),
3067                         new->br_startoff, new->br_startblock,
3068                         new->br_blockcount + right.br_blockcount,
3069                         right.br_state);
3070                 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
3071
3072                 if (bma->cur == NULL) {
3073                         rval = xfs_ilog_fext(whichfork);
3074                 } else {
3075                         rval = 0;
3076                         error = xfs_bmbt_lookup_eq(bma->cur,
3077                                         right.br_startoff,
3078                                         right.br_startblock,
3079                                         right.br_blockcount, &i);
3080                         if (error)
3081                                 goto done;
3082                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3083                         error = xfs_bmbt_update(bma->cur, new->br_startoff,
3084                                         new->br_startblock,
3085                                         new->br_blockcount +
3086                                                 right.br_blockcount,
3087                                         right.br_state);
3088                         if (error)
3089                                 goto done;
3090                 }
3091                 break;
3092
3093         case 0:
3094                 /*
3095                  * New allocation is not contiguous with another
3096                  * real allocation.
3097                  * Insert a new entry.
3098                  */
3099                 xfs_iext_insert(bma->ip, bma->idx, 1, new, state);
3100                 XFS_IFORK_NEXT_SET(bma->ip, whichfork,
3101                         XFS_IFORK_NEXTENTS(bma->ip, whichfork) + 1);
3102                 if (bma->cur == NULL) {
3103                         rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
3104                 } else {
3105                         rval = XFS_ILOG_CORE;
3106                         error = xfs_bmbt_lookup_eq(bma->cur,
3107                                         new->br_startoff,
3108                                         new->br_startblock,
3109                                         new->br_blockcount, &i);
3110                         if (error)
3111                                 goto done;
3112                         XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
3113                         bma->cur->bc_rec.b.br_state = new->br_state;
3114                         error = xfs_btree_insert(bma->cur, &i);
3115                         if (error)
3116                                 goto done;
3117                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
3118                 }
3119                 break;
3120         }
3121
3122         /* add reverse mapping */
3123         error = xfs_rmap_map_extent(mp, bma->dfops, bma->ip, whichfork, new);
3124         if (error)
3125                 goto done;
3126
3127         /* convert to a btree if necessary */
3128         if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
3129                 int     tmp_logflags;   /* partial log flag return val */
3130
3131                 ASSERT(bma->cur == NULL);
3132                 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
3133                                 bma->firstblock, bma->dfops, &bma->cur,
3134                                 0, &tmp_logflags, whichfork);
3135                 bma->logflags |= tmp_logflags;
3136                 if (error)
3137                         goto done;
3138         }
3139
3140         /* clear out the allocated field, done with it now in any case. */
3141         if (bma->cur)
3142                 bma->cur->bc_private.b.allocated = 0;
3143
3144         xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
3145 done:
3146         bma->logflags |= rval;
3147         return error;
3148 }
3149
3150 /*
3151  * Functions used in the extent read, allocate and remove paths
3152  */
3153
3154 /*
3155  * Adjust the size of the new extent based on di_extsize and rt extsize.
3156  */
3157 int
3158 xfs_bmap_extsize_align(
3159         xfs_mount_t     *mp,
3160         xfs_bmbt_irec_t *gotp,          /* next extent pointer */
3161         xfs_bmbt_irec_t *prevp,         /* previous extent pointer */
3162         xfs_extlen_t    extsz,          /* align to this extent size */
3163         int             rt,             /* is this a realtime inode? */
3164         int             eof,            /* is extent at end-of-file? */
3165         int             delay,          /* creating delalloc extent? */
3166         int             convert,        /* overwriting unwritten extent? */
3167         xfs_fileoff_t   *offp,          /* in/out: aligned offset */
3168         xfs_extlen_t    *lenp)          /* in/out: aligned length */
3169 {
3170         xfs_fileoff_t   orig_off;       /* original offset */
3171         xfs_extlen_t    orig_alen;      /* original length */
3172         xfs_fileoff_t   orig_end;       /* original off+len */
3173         xfs_fileoff_t   nexto;          /* next file offset */
3174         xfs_fileoff_t   prevo;          /* previous file offset */
3175         xfs_fileoff_t   align_off;      /* temp for offset */
3176         xfs_extlen_t    align_alen;     /* temp for length */
3177         xfs_extlen_t    temp;           /* temp for calculations */
3178
3179         if (convert)
3180                 return 0;
3181
3182         orig_off = align_off = *offp;
3183         orig_alen = align_alen = *lenp;
3184         orig_end = orig_off + orig_alen;
3185
3186         /*
3187          * If this request overlaps an existing extent, then don't
3188          * attempt to perform any additional alignment.
3189          */
3190         if (!delay && !eof &&
3191             (orig_off >= gotp->br_startoff) &&
3192             (orig_end <= gotp->br_startoff + gotp->br_blockcount)) {
3193                 return 0;
3194         }
3195
3196         /*
3197          * If the file offset is unaligned vs. the extent size
3198          * we need to align it.  This will be possible unless
3199          * the file was previously written with a kernel that didn't
3200          * perform this alignment, or if a truncate shot us in the
3201          * foot.
3202          */
3203         temp = do_mod(orig_off, extsz);
3204         if (temp) {
3205                 align_alen += temp;
3206                 align_off -= temp;
3207         }
3208
3209         /* Same adjustment for the end of the requested area. */
3210         temp = (align_alen % extsz);
3211         if (temp)
3212                 align_alen += extsz - temp;
3213
3214         /*
3215          * For large extent hint sizes, the aligned extent might be larger than
3216          * MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls
3217          * the length back under MAXEXTLEN. The outer allocation loops handle
3218          * short allocation just fine, so it is safe to do this. We only want to
3219          * do it when we are forced to, though, because it means more allocation
3220          * operations are required.
3221          */
3222         while (align_alen > MAXEXTLEN)
3223                 align_alen -= extsz;
3224         ASSERT(align_alen <= MAXEXTLEN);
3225
3226         /*
3227          * If the previous block overlaps with this proposed allocation
3228          * then move the start forward without adjusting the length.
3229          */
3230         if (prevp->br_startoff != NULLFILEOFF) {
3231                 if (prevp->br_startblock == HOLESTARTBLOCK)
3232                         prevo = prevp->br_startoff;
3233                 else
3234                         prevo = prevp->br_startoff + prevp->br_blockcount;
3235         } else
3236                 prevo = 0;
3237         if (align_off != orig_off && align_off < prevo)
3238                 align_off = prevo;
3239         /*
3240          * If the next block overlaps with this proposed allocation
3241          * then move the start back without adjusting the length,
3242          * but not before offset 0.
3243          * This may of course make the start overlap previous block,
3244          * and if we hit the offset 0 limit then the next block
3245          * can still overlap too.
3246          */
3247         if (!eof && gotp->br_startoff != NULLFILEOFF) {
3248                 if ((delay && gotp->br_startblock == HOLESTARTBLOCK) ||
3249                     (!delay && gotp->br_startblock == DELAYSTARTBLOCK))
3250                         nexto = gotp->br_startoff + gotp->br_blockcount;
3251                 else
3252                         nexto = gotp->br_startoff;
3253         } else
3254                 nexto = NULLFILEOFF;
3255         if (!eof &&
3256             align_off + align_alen != orig_end &&
3257             align_off + align_alen > nexto)
3258                 align_off = nexto > align_alen ? nexto - align_alen : 0;
3259         /*
3260          * If we're now overlapping the next or previous extent that
3261          * means we can't fit an extsz piece in this hole.  Just move
3262          * the start forward to the first valid spot and set
3263          * the length so we hit the end.
3264          */
3265         if (align_off != orig_off && align_off < prevo)
3266                 align_off = prevo;
3267         if (align_off + align_alen != orig_end &&
3268             align_off + align_alen > nexto &&
3269             nexto != NULLFILEOFF) {
3270                 ASSERT(nexto > prevo);
3271                 align_alen = nexto - align_off;
3272         }
3273
3274         /*
3275          * If realtime, and the result isn't a multiple of the realtime
3276          * extent size we need to remove blocks until it is.
3277          */
3278         if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
3279                 /*
3280                  * We're not covering the original request, or
3281                  * we won't be able to once we fix the length.
3282                  */
3283                 if (orig_off < align_off ||
3284                     orig_end > align_off + align_alen ||
3285                     align_alen - temp < orig_alen)
3286                         return -EINVAL;
3287                 /*
3288                  * Try to fix it by moving the start up.
3289                  */
3290                 if (align_off + temp <= orig_off) {
3291                         align_alen -= temp;
3292                         align_off += temp;
3293                 }
3294                 /*
3295                  * Try to fix it by moving the end in.
3296                  */
3297                 else if (align_off + align_alen - temp >= orig_end)
3298                         align_alen -= temp;
3299                 /*
3300                  * Set the start to the minimum then trim the length.
3301                  */
3302                 else {
3303                         align_alen -= orig_off - align_off;
3304                         align_off = orig_off;
3305                         align_alen -= align_alen % mp->m_sb.sb_rextsize;
3306                 }
3307                 /*
3308                  * Result doesn't cover the request, fail it.
3309                  */
3310                 if (orig_off < align_off || orig_end > align_off + align_alen)
3311                         return -EINVAL;
3312         } else {
3313                 ASSERT(orig_off >= align_off);
3314                 /* see MAXEXTLEN handling above */
3315                 ASSERT(orig_end <= align_off + align_alen ||
3316                        align_alen + extsz > MAXEXTLEN);
3317         }
3318
3319 #ifdef DEBUG
3320         if (!eof && gotp->br_startoff != NULLFILEOFF)
3321                 ASSERT(align_off + align_alen <= gotp->br_startoff);
3322         if (prevp->br_startoff != NULLFILEOFF)
3323                 ASSERT(align_off >= prevp->br_startoff + prevp->br_blockcount);
3324 #endif
3325
3326         *lenp = align_alen;
3327         *offp = align_off;
3328         return 0;
3329 }
3330
3331 #define XFS_ALLOC_GAP_UNITS     4
3332
3333 void
3334 xfs_bmap_adjacent(
3335         struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
3336 {
3337         xfs_fsblock_t   adjust;         /* adjustment to block numbers */
3338         xfs_agnumber_t  fb_agno;        /* ag number of ap->firstblock */
3339         xfs_mount_t     *mp;            /* mount point structure */
3340         int             nullfb;         /* true if ap->firstblock isn't set */
3341         int             rt;             /* true if inode is realtime */
3342
3343 #define ISVALID(x,y)    \
3344         (rt ? \
3345                 (x) < mp->m_sb.sb_rblocks : \
3346                 XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) && \
3347                 XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount && \
3348                 XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
3349
3350         mp = ap->ip->i_mount;
3351         nullfb = *ap->firstblock == NULLFSBLOCK;
3352         rt = XFS_IS_REALTIME_INODE(ap->ip) &&
3353                 xfs_alloc_is_userdata(ap->datatype);
3354         fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
3355         /*
3356          * If allocating at eof, and there's a previous real block,
3357          * try to use its last block as our starting point.
3358          */
3359         if (ap->eof && ap->prev.br_startoff != NULLFILEOFF &&
3360             !isnullstartblock(ap->prev.br_startblock) &&
3361             ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount,
3362                     ap->prev.br_startblock)) {
3363                 ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount;
3364                 /*
3365                  * Adjust for the gap between prevp and us.
3366                  */
3367                 adjust = ap->offset -
3368                         (ap->prev.br_startoff + ap->prev.br_blockcount);
3369                 if (adjust &&
3370                     ISVALID(ap->blkno + adjust, ap->prev.br_startblock))
3371                         ap->blkno += adjust;
3372         }
3373         /*
3374          * If not at eof, then compare the two neighbor blocks.
3375          * Figure out whether either one gives us a good starting point,
3376          * and pick the better one.
3377          */
3378         else if (!ap->eof) {
3379                 xfs_fsblock_t   gotbno;         /* right side block number */
3380                 xfs_fsblock_t   gotdiff=0;      /* right side difference */
3381                 xfs_fsblock_t   prevbno;        /* left side block number */
3382                 xfs_fsblock_t   prevdiff=0;     /* left side difference */
3383
3384                 /*
3385                  * If there's a previous (left) block, select a requested
3386                  * start block based on it.
3387                  */
3388                 if (ap->prev.br_startoff != NULLFILEOFF &&
3389                     !isnullstartblock(ap->prev.br_startblock) &&
3390                     (prevbno = ap->prev.br_startblock +
3391                                ap->prev.br_blockcount) &&
3392                     ISVALID(prevbno, ap->prev.br_startblock)) {
3393                         /*
3394                          * Calculate gap to end of previous block.
3395                          */
3396                         adjust = prevdiff = ap->offset -
3397                                 (ap->prev.br_startoff +
3398                                  ap->prev.br_blockcount);
3399                         /*
3400                          * Figure the startblock based on the previous block's
3401                          * end and the gap size.
3402                          * Heuristic!
3403                          * If the gap is large relative to the piece we're
3404                          * allocating, or using it gives us an invalid block
3405                          * number, then just use the end of the previous block.
3406                          */
3407                         if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3408                             ISVALID(prevbno + prevdiff,
3409                                     ap->prev.br_startblock))
3410                                 prevbno += adjust;
3411                         else
3412                                 prevdiff += adjust;
3413                         /*
3414                          * If the firstblock forbids it, can't use it,
3415                          * must use default.
3416                          */
3417                         if (!rt && !nullfb &&
3418                             XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno)
3419                                 prevbno = NULLFSBLOCK;
3420                 }
3421                 /*
3422                  * No previous block or can't follow it, just default.
3423                  */
3424                 else
3425                         prevbno = NULLFSBLOCK;
3426                 /*
3427                  * If there's a following (right) block, select a requested
3428                  * start block based on it.
3429                  */
3430                 if (!isnullstartblock(ap->got.br_startblock)) {
3431                         /*
3432                          * Calculate gap to start of next block.
3433                          */
3434                         adjust = gotdiff = ap->got.br_startoff - ap->offset;
3435                         /*
3436                          * Figure the startblock based on the next block's
3437                          * start and the gap size.
3438                          */
3439                         gotbno = ap->got.br_startblock;
3440                         /*
3441                          * Heuristic!
3442                          * If the gap is large relative to the piece we're
3443                          * allocating, or using it gives us an invalid block
3444                          * number, then just use the start of the next block
3445                          * offset by our length.
3446                          */
3447                         if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
3448                             ISVALID(gotbno - gotdiff, gotbno))
3449                                 gotbno -= adjust;
3450                         else if (ISVALID(gotbno - ap->length, gotbno)) {
3451                                 gotbno -= ap->length;
3452                                 gotdiff += adjust - ap->length;
3453                         } else
3454                                 gotdiff += adjust;
3455                         /*
3456                          * If the firstblock forbids it, can't use it,
3457                          * must use default.
3458                          */
3459                         if (!rt && !nullfb &&
3460                             XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno)
3461                                 gotbno = NULLFSBLOCK;
3462                 }
3463                 /*
3464                  * No next block, just default.
3465                  */
3466                 else
3467                         gotbno = NULLFSBLOCK;
3468                 /*
3469                  * If both valid, pick the better one, else the only good
3470                  * one, else ap->blkno is already set (to 0 or the inode block).
3471                  */
3472                 if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK)
3473                         ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno;
3474                 else if (prevbno != NULLFSBLOCK)
3475                         ap->blkno = prevbno;
3476                 else if (gotbno != NULLFSBLOCK)
3477                         ap->blkno = gotbno;
3478         }
3479 #undef ISVALID
3480 }
3481
3482 static int
3483 xfs_bmap_longest_free_extent(
3484         struct xfs_trans        *tp,
3485         xfs_agnumber_t          ag,
3486         xfs_extlen_t            *blen,
3487         int                     *notinit)
3488 {
3489         struct xfs_mount        *mp = tp->t_mountp;
3490         struct xfs_perag        *pag;
3491         xfs_extlen_t            longest;
3492         int                     error = 0;
3493
3494         pag = xfs_perag_get(mp, ag);
3495         if (!pag->pagf_init) {
3496                 error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK);
3497                 if (error)
3498                         goto out;
3499
3500                 if (!pag->pagf_init) {
3501                         *notinit = 1;
3502                         goto out;
3503                 }
3504         }
3505
3506         longest = xfs_alloc_longest_free_extent(mp, pag,
3507                                 xfs_alloc_min_freelist(mp, pag),
3508                                 xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
3509         if (*blen < longest)
3510                 *blen = longest;
3511
3512 out:
3513         xfs_perag_put(pag);
3514         return error;
3515 }
3516
3517 static void
3518 xfs_bmap_select_minlen(
3519         struct xfs_bmalloca     *ap,
3520         struct xfs_alloc_arg    *args,
3521         xfs_extlen_t            *blen,
3522         int                     notinit)
3523 {
3524         if (notinit || *blen < ap->minlen) {
3525                 /*
3526                  * Since we did a BUF_TRYLOCK above, it is possible that
3527                  * there is space for this request.
3528                  */
3529                 args->minlen = ap->minlen;
3530         } else if (*blen < args->maxlen) {
3531                 /*
3532                  * If the best seen length is less than the request length,
3533                  * use the best as the minimum.
3534                  */
3535                 args->minlen = *blen;
3536         } else {
3537                 /*
3538                  * Otherwise we've seen an extent as big as maxlen, use that
3539                  * as the minimum.
3540                  */
3541                 args->minlen = args->maxlen;
3542         }
3543 }
3544
3545 STATIC int
3546 xfs_bmap_btalloc_nullfb(
3547         struct xfs_bmalloca     *ap,
3548         struct xfs_alloc_arg    *args,
3549         xfs_extlen_t            *blen)
3550 {
3551         struct xfs_mount        *mp = ap->ip->i_mount;
3552         xfs_agnumber_t          ag, startag;
3553         int                     notinit = 0;
3554         int                     error;
3555
3556         args->type = XFS_ALLOCTYPE_START_BNO;
3557         args->total = ap->total;
3558
3559         startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3560         if (startag == NULLAGNUMBER)
3561                 startag = ag = 0;
3562
3563         while (*blen < args->maxlen) {
3564                 error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3565                                                      &notinit);
3566                 if (error)
3567                         return error;
3568
3569                 if (++ag == mp->m_sb.sb_agcount)
3570                         ag = 0;
3571                 if (ag == startag)
3572                         break;
3573         }
3574
3575         xfs_bmap_select_minlen(ap, args, blen, notinit);
3576         return 0;
3577 }
3578
3579 STATIC int
3580 xfs_bmap_btalloc_filestreams(
3581         struct xfs_bmalloca     *ap,
3582         struct xfs_alloc_arg    *args,
3583         xfs_extlen_t            *blen)
3584 {
3585         struct xfs_mount        *mp = ap->ip->i_mount;
3586         xfs_agnumber_t          ag;
3587         int                     notinit = 0;
3588         int                     error;
3589
3590         args->type = XFS_ALLOCTYPE_NEAR_BNO;
3591         args->total = ap->total;
3592
3593         ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
3594         if (ag == NULLAGNUMBER)
3595                 ag = 0;
3596
3597         error = xfs_bmap_longest_free_extent(args->tp, ag, blen, &notinit);
3598         if (error)
3599                 return error;
3600
3601         if (*blen < args->maxlen) {
3602                 error = xfs_filestream_new_ag(ap, &ag);
3603                 if (error)
3604                         return error;
3605
3606                 error = xfs_bmap_longest_free_extent(args->tp, ag, blen,
3607                                                      &notinit);
3608                 if (error)
3609                         return error;
3610
3611         }
3612
3613         xfs_bmap_select_minlen(ap, args, blen, notinit);
3614
3615         /*
3616          * Set the failure fallback case to look in the selected AG as stream
3617          * may have moved.
3618          */
3619         ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
3620         return 0;
3621 }
3622
3623 STATIC int
3624 xfs_bmap_btalloc(
3625         struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
3626 {
3627         xfs_mount_t     *mp;            /* mount point structure */
3628         xfs_alloctype_t atype = 0;      /* type for allocation routines */
3629         xfs_extlen_t    align = 0;      /* minimum allocation alignment */
3630         xfs_agnumber_t  fb_agno;        /* ag number of ap->firstblock */
3631         xfs_agnumber_t  ag;
3632         xfs_alloc_arg_t args;
3633         xfs_extlen_t    blen;
3634         xfs_extlen_t    nextminlen = 0;
3635         int             nullfb;         /* true if ap->firstblock isn't set */
3636         int             isaligned;
3637         int             tryagain;
3638         int             error;
3639         int             stripe_align;
3640
3641         ASSERT(ap->length);
3642
3643         mp = ap->ip->i_mount;
3644
3645         /* stripe alignment for allocation is determined by mount parameters */
3646         stripe_align = 0;
3647         if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
3648                 stripe_align = mp->m_swidth;
3649         else if (mp->m_dalign)
3650                 stripe_align = mp->m_dalign;
3651
3652         if (xfs_alloc_is_userdata(ap->datatype))
3653                 align = xfs_get_extsz_hint(ap->ip);
3654         if (unlikely(align)) {
3655                 error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
3656                                                 align, 0, ap->eof, 0, ap->conv,
3657                                                 &ap->offset, &ap->length);
3658                 ASSERT(!error);
3659                 ASSERT(ap->length);
3660         }
3661
3662
3663         nullfb = *ap->firstblock == NULLFSBLOCK;
3664         fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
3665         if (nullfb) {
3666                 if (xfs_alloc_is_userdata(ap->datatype) &&
3667                     xfs_inode_is_filestream(ap->ip)) {
3668                         ag = xfs_filestream_lookup_ag(ap->ip);
3669                         ag = (ag != NULLAGNUMBER) ? ag : 0;
3670                         ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0);
3671                 } else {
3672                         ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
3673                 }
3674         } else
3675                 ap->blkno = *ap->firstblock;
3676
3677         xfs_bmap_adjacent(ap);
3678
3679         /*
3680          * If allowed, use ap->blkno; otherwise must use firstblock since
3681          * it's in the right allocation group.
3682          */
3683         if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno)
3684                 ;
3685         else
3686                 ap->blkno = *ap->firstblock;
3687         /*
3688          * Normal allocation, done through xfs_alloc_vextent.
3689          */
3690         tryagain = isaligned = 0;
3691         memset(&args, 0, sizeof(args));
3692         args.tp = ap->tp;
3693         args.mp = mp;
3694         args.fsbno = ap->blkno;
3695         xfs_rmap_skip_owner_update(&args.oinfo);
3696
3697         /* Trim the allocation back to the maximum an AG can fit. */
3698         args.maxlen = MIN(ap->length, mp->m_ag_max_usable);
3699         args.firstblock = *ap->firstblock;
3700         blen = 0;
3701         if (nullfb) {
3702                 /*
3703                  * Search for an allocation group with a single extent large
3704                  * enough for the request.  If one isn't found, then adjust
3705                  * the minimum allocation size to the largest space found.
3706                  */
3707                 if (xfs_alloc_is_userdata(ap->datatype) &&
3708                     xfs_inode_is_filestream(ap->ip))
3709                         error = xfs_bmap_btalloc_filestreams(ap, &args, &blen);
3710                 else
3711                         error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
3712                 if (error)
3713                         return error;
3714         } else if (ap->dfops->dop_low) {
3715                 if (xfs_inode_is_filestream(ap->ip))
3716                         args.type = XFS_ALLOCTYPE_FIRST_AG;
3717                 else
3718                         args.type = XFS_ALLOCTYPE_START_BNO;
3719                 args.total = args.minlen = ap->minlen;
3720         } else {
3721                 args.type = XFS_ALLOCTYPE_NEAR_BNO;
3722                 args.total = ap->total;
3723                 args.minlen = ap->minlen;
3724         }
3725         /* apply extent size hints if obtained earlier */
3726         if (unlikely(align)) {
3727                 args.prod = align;
3728                 if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod)))
3729                         args.mod = (xfs_extlen_t)(args.prod - args.mod);
3730         } else if (mp->m_sb.sb_blocksize >= PAGE_SIZE) {
3731                 args.prod = 1;
3732                 args.mod = 0;
3733         } else {
3734                 args.prod = PAGE_SIZE >> mp->m_sb.sb_blocklog;
3735                 if ((args.mod = (xfs_extlen_t)(do_mod(ap->offset, args.prod))))
3736                         args.mod = (xfs_extlen_t)(args.prod - args.mod);
3737         }
3738         /*
3739          * If we are not low on available data blocks, and the
3740          * underlying logical volume manager is a stripe, and
3741          * the file offset is zero then try to allocate data
3742          * blocks on stripe unit boundary.
3743          * NOTE: ap->aeof is only set if the allocation length
3744          * is >= the stripe unit and the allocation offset is
3745          * at the end of file.
3746          */
3747         if (!ap->dfops->dop_low && ap->aeof) {
3748                 if (!ap->offset) {
3749                         args.alignment = stripe_align;
3750                         atype = args.type;
3751                         isaligned = 1;
3752                         /*
3753                          * Adjust for alignment
3754                          */
3755                         if (blen > args.alignment && blen <= args.maxlen)
3756                                 args.minlen = blen - args.alignment;
3757                         args.minalignslop = 0;
3758                 } else {
3759                         /*
3760                          * First try an exact bno allocation.
3761                          * If it fails then do a near or start bno
3762                          * allocation with alignment turned on.
3763                          */
3764                         atype = args.type;
3765                         tryagain = 1;
3766                         args.type = XFS_ALLOCTYPE_THIS_BNO;
3767                         args.alignment = 1;
3768                         /*
3769                          * Compute the minlen+alignment for the
3770                          * next case.  Set slop so that the value
3771                          * of minlen+alignment+slop doesn't go up
3772                          * between the calls.
3773                          */
3774                         if (blen > stripe_align && blen <= args.maxlen)
3775                                 nextminlen = blen - stripe_align;
3776                         else
3777                                 nextminlen = args.minlen;
3778                         if (nextminlen + stripe_align > args.minlen + 1)
3779                                 args.minalignslop =
3780                                         nextminlen + stripe_align -
3781                                         args.minlen - 1;
3782                         else
3783                                 args.minalignslop = 0;
3784                 }
3785         } else {
3786                 args.alignment = 1;
3787                 args.minalignslop = 0;
3788         }
3789         args.minleft = ap->minleft;
3790         args.wasdel = ap->wasdel;
3791         args.resv = XFS_AG_RESV_NONE;
3792         args.datatype = ap->datatype;
3793         if (ap->datatype & XFS_ALLOC_USERDATA_ZERO)
3794                 args.ip = ap->ip;
3795
3796         error = xfs_alloc_vextent(&args);
3797         if (error)
3798                 return error;
3799
3800         if (tryagain && args.fsbno == NULLFSBLOCK) {
3801                 /*
3802                  * Exact allocation failed. Now try with alignment
3803                  * turned on.
3804                  */
3805                 args.type = atype;
3806                 args.fsbno = ap->blkno;
3807                 args.alignment = stripe_align;
3808                 args.minlen = nextminlen;
3809                 args.minalignslop = 0;
3810                 isaligned = 1;
3811                 if ((error = xfs_alloc_vextent(&args)))
3812                         return error;
3813         }
3814         if (isaligned && args.fsbno == NULLFSBLOCK) {
3815                 /*
3816                  * allocation failed, so turn off alignment and
3817                  * try again.
3818                  */
3819                 args.type = atype;
3820                 args.fsbno = ap->blkno;
3821                 args.alignment = 0;
3822                 if ((error = xfs_alloc_vextent(&args)))
3823                         return error;
3824         }
3825         if (args.fsbno == NULLFSBLOCK && nullfb &&
3826             args.minlen > ap->minlen) {
3827                 args.minlen = ap->minlen;
3828                 args.type = XFS_ALLOCTYPE_START_BNO;
3829                 args.fsbno = ap->blkno;
3830                 if ((error = xfs_alloc_vextent(&args)))
3831                         return error;
3832         }
3833         if (args.fsbno == NULLFSBLOCK && nullfb) {
3834                 args.fsbno = 0;
3835                 args.type = XFS_ALLOCTYPE_FIRST_AG;
3836                 args.total = ap->minlen;
3837                 args.minleft = 0;
3838                 if ((error = xfs_alloc_vextent(&args)))
3839                         return error;
3840                 ap->dfops->dop_low = true;
3841         }
3842         if (args.fsbno != NULLFSBLOCK) {
3843                 /*
3844                  * check the allocation happened at the same or higher AG than
3845                  * the first block that was allocated.
3846                  */
3847                 ASSERT(*ap->firstblock == NULLFSBLOCK ||
3848                        XFS_FSB_TO_AGNO(mp, *ap->firstblock) ==
3849                        XFS_FSB_TO_AGNO(mp, args.fsbno) ||
3850                        (ap->dfops->dop_low &&
3851                         XFS_FSB_TO_AGNO(mp, *ap->firstblock) <
3852                         XFS_FSB_TO_AGNO(mp, args.fsbno)));
3853
3854                 ap->blkno = args.fsbno;
3855                 if (*ap->firstblock == NULLFSBLOCK)
3856                         *ap->firstblock = args.fsbno;
3857                 ASSERT(nullfb || fb_agno == args.agno ||
3858                        (ap->dfops->dop_low && fb_agno < args.agno));
3859                 ap->length = args.len;
3860                 ap->ip->i_d.di_nblocks += args.len;
3861                 xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
3862                 if (ap->wasdel)
3863                         ap->ip->i_delayed_blks -= args.len;
3864                 /*
3865                  * Adjust the disk quota also. This was reserved
3866                  * earlier.
3867                  */
3868                 xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
3869                         ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT :
3870                                         XFS_TRANS_DQ_BCOUNT,
3871                         (long) args.len);
3872         } else {
3873                 ap->blkno = NULLFSBLOCK;
3874                 ap->length = 0;
3875         }
3876         return 0;
3877 }
3878
3879 /*
3880  * For a remap operation, just "allocate" an extent at the address that the
3881  * caller passed in, and ensure that the AGFL is the right size.  The caller
3882  * will then map the "allocated" extent into the file somewhere.
3883  */
3884 STATIC int
3885 xfs_bmap_remap_alloc(
3886         struct xfs_bmalloca     *ap)
3887 {
3888         struct xfs_trans        *tp = ap->tp;
3889         struct xfs_mount        *mp = tp->t_mountp;
3890         xfs_agblock_t           bno;
3891         struct xfs_alloc_arg    args;
3892         int                     error;
3893
3894         /*
3895          * validate that the block number is legal - the enables us to detect
3896          * and handle a silent filesystem corruption rather than crashing.
3897          */
3898         memset(&args, 0, sizeof(struct xfs_alloc_arg));
3899         args.tp = ap->tp;
3900         args.mp = ap->tp->t_mountp;
3901         bno = *ap->firstblock;
3902         args.agno = XFS_FSB_TO_AGNO(mp, bno);
3903         args.agbno = XFS_FSB_TO_AGBNO(mp, bno);
3904         if (args.agno >= mp->m_sb.sb_agcount ||
3905             args.agbno >= mp->m_sb.sb_agblocks)
3906                 return -EFSCORRUPTED;
3907
3908         /* "Allocate" the extent from the range we passed in. */
3909         trace_xfs_bmap_remap_alloc(ap->ip, *ap->firstblock, ap->length);
3910         ap->blkno = bno;
3911         ap->ip->i_d.di_nblocks += ap->length;
3912         xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
3913
3914         /* Fix the freelist, like a real allocator does. */
3915         args.datatype = ap->datatype;
3916         args.pag = xfs_perag_get(args.mp, args.agno);
3917         ASSERT(args.pag);
3918
3919         /*
3920          * The freelist fixing code will decline the allocation if
3921          * the size and shape of the free space doesn't allow for
3922          * allocating the extent and updating all the metadata that
3923          * happens during an allocation.  We're remapping, not
3924          * allocating, so skip that check by pretending to be freeing.
3925          */
3926         error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
3927         if (error)
3928                 goto error0;
3929 error0:
3930         xfs_perag_put(args.pag);
3931         if (error)
3932                 trace_xfs_bmap_remap_alloc_error(ap->ip, error, _RET_IP_);
3933         return error;
3934 }
3935
3936 /*
3937  * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
3938  * It figures out where to ask the underlying allocator to put the new extent.
3939  */
3940 STATIC int
3941 xfs_bmap_alloc(
3942         struct xfs_bmalloca     *ap)    /* bmap alloc argument struct */
3943 {
3944         if (ap->flags & XFS_BMAPI_REMAP)
3945                 return xfs_bmap_remap_alloc(ap);
3946         if (XFS_IS_REALTIME_INODE(ap->ip) &&
3947             xfs_alloc_is_userdata(ap->datatype))
3948                 return xfs_bmap_rtalloc(ap);
3949         return xfs_bmap_btalloc(ap);
3950 }
3951
3952 /*
3953  * Trim the returned map to the required bounds
3954  */
3955 STATIC void
3956 xfs_bmapi_trim_map(
3957         struct xfs_bmbt_irec    *mval,
3958         struct xfs_bmbt_irec    *got,
3959         xfs_fileoff_t           *bno,
3960         xfs_filblks_t           len,
3961         xfs_fileoff_t           obno,
3962         xfs_fileoff_t           end,
3963         int                     n,
3964         int                     flags)
3965 {
3966         if ((flags & XFS_BMAPI_ENTIRE) ||
3967             got->br_startoff + got->br_blockcount <= obno) {
3968                 *mval = *got;
3969                 if (isnullstartblock(got->br_startblock))
3970                         mval->br_startblock = DELAYSTARTBLOCK;
3971                 return;
3972         }
3973
3974         if (obno > *bno)
3975                 *bno = obno;
3976         ASSERT((*bno >= obno) || (n == 0));
3977         ASSERT(*bno < end);
3978         mval->br_startoff = *bno;
3979         if (isnullstartblock(got->br_startblock))
3980                 mval->br_startblock = DELAYSTARTBLOCK;
3981         else
3982                 mval->br_startblock = got->br_startblock +
3983                                         (*bno - got->br_startoff);
3984         /*
3985          * Return the minimum of what we got and what we asked for for
3986          * the length.  We can use the len variable here because it is
3987          * modified below and we could have been there before coming
3988          * here if the first part of the allocation didn't overlap what
3989          * was asked for.
3990          */
3991         mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno,
3992                         got->br_blockcount - (*bno - got->br_startoff));
3993         mval->br_state = got->br_state;
3994         ASSERT(mval->br_blockcount <= len);
3995         return;
3996 }
3997
3998 /*
3999  * Update and validate the extent map to return
4000  */
4001 STATIC void
4002 xfs_bmapi_update_map(
4003         struct xfs_bmbt_irec    **map,
4004         xfs_fileoff_t           *bno,
4005         xfs_filblks_t           *len,
4006         xfs_fileoff_t           obno,
4007         xfs_fileoff_t           end,
4008         int                     *n,
4009         int                     flags)
4010 {
4011         xfs_bmbt_irec_t *mval = *map;
4012
4013         ASSERT((flags & XFS_BMAPI_ENTIRE) ||
4014                ((mval->br_startoff + mval->br_blockcount) <= end));
4015         ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) ||
4016                (mval->br_startoff < obno));
4017
4018         *bno = mval->br_startoff + mval->br_blockcount;
4019         *len = end - *bno;
4020         if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) {
4021                 /* update previous map with new information */
4022                 ASSERT(mval->br_startblock == mval[-1].br_startblock);
4023                 ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
4024                 ASSERT(mval->br_state == mval[-1].br_state);
4025                 mval[-1].br_blockcount = mval->br_blockcount;
4026                 mval[-1].br_state = mval->br_state;
4027         } else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
4028                    mval[-1].br_startblock != DELAYSTARTBLOCK &&
4029                    mval[-1].br_startblock != HOLESTARTBLOCK &&
4030                    mval->br_startblock == mval[-1].br_startblock +
4031                                           mval[-1].br_blockcount &&
4032                    ((flags & XFS_BMAPI_IGSTATE) ||
4033                         mval[-1].br_state == mval->br_state)) {
4034                 ASSERT(mval->br_startoff ==
4035                        mval[-1].br_startoff + mval[-1].br_blockcount);
4036                 mval[-1].br_blockcount += mval->br_blockcount;
4037         } else if (*n > 0 &&
4038                    mval->br_startblock == DELAYSTARTBLOCK &&
4039                    mval[-1].br_startblock == DELAYSTARTBLOCK &&
4040                    mval->br_startoff ==
4041                    mval[-1].br_startoff + mval[-1].br_blockcount) {
4042                 mval[-1].br_blockcount += mval->br_blockcount;
4043                 mval[-1].br_state = mval->br_state;
4044         } else if (!((*n == 0) &&
4045                      ((mval->br_startoff + mval->br_blockcount) <=
4046                       obno))) {
4047                 mval++;
4048                 (*n)++;
4049         }
4050         *map = mval;
4051 }
4052
4053 /*
4054  * Map file blocks to filesystem blocks without allocation.
4055  */
4056 int
4057 xfs_bmapi_read(
4058         struct xfs_inode        *ip,
4059         xfs_fileoff_t           bno,
4060         xfs_filblks_t           len,
4061         struct xfs_bmbt_irec    *mval,
4062         int                     *nmap,
4063         int                     flags)
4064 {
4065         struct xfs_mount        *mp = ip->i_mount;
4066         struct xfs_ifork        *ifp;
4067         struct xfs_bmbt_irec    got;
4068         struct xfs_bmbt_irec    prev;
4069         xfs_fileoff_t           obno;
4070         xfs_fileoff_t           end;
4071         xfs_extnum_t            lastx;
4072         int                     error;
4073         int                     eof;
4074         int                     n = 0;
4075         int                     whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
4076                                                 XFS_ATTR_FORK : XFS_DATA_FORK;
4077
4078         ASSERT(*nmap >= 1);
4079         ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE|
4080                            XFS_BMAPI_IGSTATE)));
4081         ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));
4082
4083         if (unlikely(XFS_TEST_ERROR(
4084             (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
4085              XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
4086              mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
4087                 XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp);
4088                 return -EFSCORRUPTED;
4089         }
4090
4091         if (XFS_FORCED_SHUTDOWN(mp))
4092                 return -EIO;
4093
4094         XFS_STATS_INC(mp, xs_blk_mapr);
4095
4096         ifp = XFS_IFORK_PTR(ip, whichfork);
4097
4098         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4099                 error = xfs_iread_extents(NULL, ip, whichfork);
4100                 if (error)
4101                         return error;
4102         }
4103
4104         xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev);
4105         end = bno + len;
4106         obno = bno;
4107
4108         while (bno < end && n < *nmap) {
4109                 /* Reading past eof, act as though there's a hole up to end. */
4110                 if (eof)
4111                         got.br_startoff = end;
4112                 if (got.br_startoff > bno) {
4113                         /* Reading in a hole.  */
4114                         mval->br_startoff = bno;
4115                         mval->br_startblock = HOLESTARTBLOCK;
4116                         mval->br_blockcount =
4117                                 XFS_FILBLKS_MIN(len, got.br_startoff - bno);
4118                         mval->br_state = XFS_EXT_NORM;
4119                         bno += mval->br_blockcount;
4120                         len -= mval->br_blockcount;
4121                         mval++;
4122                         n++;
4123                         continue;
4124                 }
4125
4126                 /* set up the extent map to return. */
4127                 xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
4128                 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4129
4130                 /* If we're done, stop now. */
4131                 if (bno >= end || n >= *nmap)
4132                         break;
4133
4134                 /* Else go on to the next record. */
4135                 if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
4136                         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got);
4137                 else
4138                         eof = 1;
4139         }
4140         *nmap = n;
4141         return 0;
4142 }
4143
4144 int
4145 xfs_bmapi_reserve_delalloc(
4146         struct xfs_inode        *ip,
4147         xfs_fileoff_t           aoff,
4148         xfs_filblks_t           len,
4149         struct xfs_bmbt_irec    *got,
4150         struct xfs_bmbt_irec    *prev,
4151         xfs_extnum_t            *lastx,
4152         int                     eof)
4153 {
4154         struct xfs_mount        *mp = ip->i_mount;
4155         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
4156         xfs_extlen_t            alen;
4157         xfs_extlen_t            indlen;
4158         char                    rt = XFS_IS_REALTIME_INODE(ip);
4159         xfs_extlen_t            extsz;
4160         int                     error;
4161
4162         alen = XFS_FILBLKS_MIN(len, MAXEXTLEN);
4163         if (!eof)
4164                 alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
4165
4166         /* Figure out the extent size, adjust alen */
4167         extsz = xfs_get_extsz_hint(ip);
4168         if (extsz) {
4169                 error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof,
4170                                                1, 0, &aoff, &alen);
4171                 ASSERT(!error);
4172         }
4173
4174         if (rt)
4175                 extsz = alen / mp->m_sb.sb_rextsize;
4176
4177         /*
4178          * Make a transaction-less quota reservation for delayed allocation
4179          * blocks.  This number gets adjusted later.  We return if we haven't
4180          * allocated blocks already inside this loop.
4181          */
4182         error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0,
4183                         rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
4184         if (error)
4185                 return error;
4186
4187         /*
4188          * Split changing sb for alen and indlen since they could be coming
4189          * from different places.
4190          */
4191         indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
4192         ASSERT(indlen > 0);
4193
4194         if (rt) {
4195                 error = xfs_mod_frextents(mp, -((int64_t)extsz));
4196         } else {
4197                 error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
4198         }
4199
4200         if (error)
4201                 goto out_unreserve_quota;
4202
4203         error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
4204         if (error)
4205                 goto out_unreserve_blocks;
4206
4207
4208         ip->i_delayed_blks += alen;
4209
4210         got->br_startoff = aoff;
4211         got->br_startblock = nullstartblock(indlen);
4212         got->br_blockcount = alen;
4213         got->br_state = XFS_EXT_NORM;
4214         xfs_bmap_add_extent_hole_delay(ip, lastx, got);
4215
4216         /*
4217          * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay
4218          * might have merged it into one of the neighbouring ones.
4219          */
4220         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
4221
4222         ASSERT(got->br_startoff <= aoff);
4223         ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
4224         ASSERT(isnullstartblock(got->br_startblock));
4225         ASSERT(got->br_state == XFS_EXT_NORM);
4226         return 0;
4227
4228 out_unreserve_blocks:
4229         if (rt)
4230                 xfs_mod_frextents(mp, extsz);
4231         else
4232                 xfs_mod_fdblocks(mp, alen, false);
4233 out_unreserve_quota:
4234         if (XFS_IS_QUOTA_ON(mp))
4235                 xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ?
4236                                 XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
4237         return error;
4238 }
4239
4240 static int
4241 xfs_bmapi_allocate(
4242         struct xfs_bmalloca     *bma)
4243 {
4244         struct xfs_mount        *mp = bma->ip->i_mount;
4245         int                     whichfork = (bma->flags & XFS_BMAPI_ATTRFORK) ?
4246                                                 XFS_ATTR_FORK : XFS_DATA_FORK;
4247         struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4248         int                     tmp_logflags = 0;
4249         int                     error;
4250
4251         ASSERT(bma->length > 0);
4252
4253         /*
4254          * For the wasdelay case, we could also just allocate the stuff asked
4255          * for in this bmap call but that wouldn't be as good.
4256          */
4257         if (bma->wasdel) {
4258                 bma->length = (xfs_extlen_t)bma->got.br_blockcount;
4259                 bma->offset = bma->got.br_startoff;
4260                 if (bma->idx != NULLEXTNUM && bma->idx) {
4261                         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1),
4262                                          &bma->prev);
4263                 }
4264         } else {
4265                 bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN);
4266                 if (!bma->eof)
4267                         bma->length = XFS_FILBLKS_MIN(bma->length,
4268                                         bma->got.br_startoff - bma->offset);
4269         }
4270
4271         /*
4272          * Set the data type being allocated. For the data fork, the first data
4273          * in the file is treated differently to all other allocations. For the
4274          * attribute fork, we only need to ensure the allocated range is not on
4275          * the busy list.
4276          */
4277         if (!(bma->flags & XFS_BMAPI_METADATA)) {
4278                 bma->datatype = XFS_ALLOC_NOBUSY;
4279                 if (whichfork == XFS_DATA_FORK) {
4280                         if (bma->offset == 0)
4281                                 bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
4282                         else
4283                                 bma->datatype |= XFS_ALLOC_USERDATA;
4284                 }
4285                 if (bma->flags & XFS_BMAPI_ZERO)
4286                         bma->datatype |= XFS_ALLOC_USERDATA_ZERO;
4287         }
4288
4289         bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
4290
4291         /*
4292          * Only want to do the alignment at the eof if it is userdata and
4293          * allocation length is larger than a stripe unit.
4294          */
4295         if (mp->m_dalign && bma->length >= mp->m_dalign &&
4296             !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
4297                 error = xfs_bmap_isaeof(bma, whichfork);
4298                 if (error)
4299                         return error;
4300         }
4301
4302         error = xfs_bmap_alloc(bma);
4303         if (error)
4304                 return error;
4305
4306         if (bma->dfops->dop_low)
4307                 bma->minleft = 0;
4308         if (bma->cur)
4309                 bma->cur->bc_private.b.firstblock = *bma->firstblock;
4310         if (bma->blkno == NULLFSBLOCK)
4311                 return 0;
4312         if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
4313                 bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
4314                 bma->cur->bc_private.b.firstblock = *bma->firstblock;
4315                 bma->cur->bc_private.b.dfops = bma->dfops;
4316         }
4317         /*
4318          * Bump the number of extents we've allocated
4319          * in this call.
4320          */
4321         bma->nallocs++;
4322
4323         if (bma->cur)
4324                 bma->cur->bc_private.b.flags =
4325                         bma->wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
4326
4327         bma->got.br_startoff = bma->offset;
4328         bma->got.br_startblock = bma->blkno;
4329         bma->got.br_blockcount = bma->length;
4330         bma->got.br_state = XFS_EXT_NORM;
4331
4332         /*
4333          * A wasdelay extent has been initialized, so shouldn't be flagged
4334          * as unwritten.
4335          */
4336         if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) &&
4337             xfs_sb_version_hasextflgbit(&mp->m_sb))
4338                 bma->got.br_state = XFS_EXT_UNWRITTEN;
4339
4340         if (bma->wasdel)
4341                 error = xfs_bmap_add_extent_delay_real(bma);
4342         else
4343                 error = xfs_bmap_add_extent_hole_real(bma, whichfork);
4344
4345         bma->logflags |= tmp_logflags;
4346         if (error)
4347                 return error;
4348
4349         /*
4350          * Update our extent pointer, given that xfs_bmap_add_extent_delay_real
4351          * or xfs_bmap_add_extent_hole_real might have merged it into one of
4352          * the neighbouring ones.
4353          */
4354         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got);
4355
4356         ASSERT(bma->got.br_startoff <= bma->offset);
4357         ASSERT(bma->got.br_startoff + bma->got.br_blockcount >=
4358                bma->offset + bma->length);
4359         ASSERT(bma->got.br_state == XFS_EXT_NORM ||
4360                bma->got.br_state == XFS_EXT_UNWRITTEN);
4361         return 0;
4362 }
4363
4364 STATIC int
4365 xfs_bmapi_convert_unwritten(
4366         struct xfs_bmalloca     *bma,
4367         struct xfs_bmbt_irec    *mval,
4368         xfs_filblks_t           len,
4369         int                     flags)
4370 {
4371         int                     whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
4372                                                 XFS_ATTR_FORK : XFS_DATA_FORK;
4373         struct xfs_ifork        *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
4374         int                     tmp_logflags = 0;
4375         int                     error;
4376
4377         /* check if we need to do unwritten->real conversion */
4378         if (mval->br_state == XFS_EXT_UNWRITTEN &&
4379             (flags & XFS_BMAPI_PREALLOC))
4380                 return 0;
4381
4382         /* check if we need to do real->unwritten conversion */
4383         if (mval->br_state == XFS_EXT_NORM &&
4384             (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) !=
4385                         (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
4386                 return 0;
4387
4388         /*
4389          * Modify (by adding) the state flag, if writing.
4390          */
4391         ASSERT(mval->br_blockcount <= len);
4392         if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
4393                 bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
4394                                         bma->ip, whichfork);
4395                 bma->cur->bc_private.b.firstblock = *bma->firstblock;
4396                 bma->cur->bc_private.b.dfops = bma->dfops;
4397         }
4398         mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
4399                                 ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
4400
4401         /*
4402          * Before insertion into the bmbt, zero the range being converted
4403          * if required.
4404          */
4405         if (flags & XFS_BMAPI_ZERO) {
4406                 error = xfs_zero_extent(bma->ip, mval->br_startblock,
4407                                         mval->br_blockcount);
4408                 if (error)
4409                         return error;
4410         }
4411
4412         error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
4413                         &bma->cur, mval, bma->firstblock, bma->dfops,
4414                         &tmp_logflags);
4415         /*
4416          * Log the inode core unconditionally in the unwritten extent conversion
4417          * path because the conversion might not have done so (e.g., if the
4418          * extent count hasn't changed). We need to make sure the inode is dirty
4419          * in the transaction for the sake of fsync(), even if nothing has
4420          * changed, because fsync() will not force the log for this transaction
4421          * unless it sees the inode pinned.
4422          */
4423         bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
4424         if (error)
4425                 return error;
4426
4427         /*
4428          * Update our extent pointer, given that
4429          * xfs_bmap_add_extent_unwritten_real might have merged it into one
4430          * of the neighbouring ones.
4431          */
4432         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got);
4433
4434         /*
4435          * We may have combined previously unwritten space with written space,
4436          * so generate another request.
4437          */
4438         if (mval->br_blockcount < len)
4439                 return -EAGAIN;
4440         return 0;
4441 }
4442
4443 /*
4444  * Map file blocks to filesystem blocks, and allocate blocks or convert the
4445  * extent state if necessary.  Details behaviour is controlled by the flags
4446  * parameter.  Only allocates blocks from a single allocation group, to avoid
4447  * locking problems.
4448  *
4449  * The returned value in "firstblock" from the first call in a transaction
4450  * must be remembered and presented to subsequent calls in "firstblock".
4451  * An upper bound for the number of blocks to be allocated is supplied to
4452  * the first call in "total"; if no allocation group has that many free
4453  * blocks then the call will fail (return NULLFSBLOCK in "firstblock").
4454  */
4455 int
4456 xfs_bmapi_write(
4457         struct xfs_trans        *tp,            /* transaction pointer */
4458         struct xfs_inode        *ip,            /* incore inode */
4459         xfs_fileoff_t           bno,            /* starting file offs. mapped */
4460         xfs_filblks_t           len,            /* length to map in file */
4461         int                     flags,          /* XFS_BMAPI_... */
4462         xfs_fsblock_t           *firstblock,    /* first allocated block
4463                                                    controls a.g. for allocs */
4464         xfs_extlen_t            total,          /* total blocks needed */
4465         struct xfs_bmbt_irec    *mval,          /* output: map values */
4466         int                     *nmap,          /* i/o: mval size/count */
4467         struct xfs_defer_ops    *dfops)         /* i/o: list extents to free */
4468 {
4469         struct xfs_mount        *mp = ip->i_mount;
4470         struct xfs_ifork        *ifp;
4471         struct xfs_bmalloca     bma = { NULL }; /* args for xfs_bmap_alloc */
4472         xfs_fileoff_t           end;            /* end of mapped file region */
4473         int                     eof;            /* after the end of extents */
4474         int                     error;          /* error return */
4475         int                     n;              /* current extent index */
4476         xfs_fileoff_t           obno;           /* old block number (offset) */
4477         int                     whichfork;      /* data or attr fork */
4478         char                    inhole;         /* current location is hole in file */
4479         char                    wasdelay;       /* old extent was delayed */
4480
4481 #ifdef DEBUG
4482         xfs_fileoff_t           orig_bno;       /* original block number value */
4483         int                     orig_flags;     /* original flags arg value */
4484         xfs_filblks_t           orig_len;       /* original value of len arg */
4485         struct xfs_bmbt_irec    *orig_mval;     /* original value of mval */
4486         int                     orig_nmap;      /* original value of *nmap */
4487
4488         orig_bno = bno;
4489         orig_len = len;
4490         orig_flags = flags;
4491         orig_mval = mval;
4492         orig_nmap = *nmap;
4493 #endif
4494         whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
4495                 XFS_ATTR_FORK : XFS_DATA_FORK;
4496
4497         ASSERT(*nmap >= 1);
4498         ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4499         ASSERT(!(flags & XFS_BMAPI_IGSTATE));
4500         ASSERT(tp != NULL);
4501         ASSERT(len > 0);
4502         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
4503         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4504         ASSERT(!(flags & XFS_BMAPI_REMAP) || whichfork == XFS_DATA_FORK);
4505         ASSERT(!(flags & XFS_BMAPI_PREALLOC) || !(flags & XFS_BMAPI_REMAP));
4506         ASSERT(!(flags & XFS_BMAPI_CONVERT) || !(flags & XFS_BMAPI_REMAP));
4507
4508         /* zeroing is for currently only for data extents, not metadata */
4509         ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
4510                         (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO));
4511         /*
4512          * we can allocate unwritten extents or pre-zero allocated blocks,
4513          * but it makes no sense to do both at once. This would result in
4514          * zeroing the unwritten extent twice, but it still being an
4515          * unwritten extent....
4516          */
4517         ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) !=
4518                         (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
4519
4520         if (unlikely(XFS_TEST_ERROR(
4521             (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
4522              XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
4523              mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
4524                 XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp);
4525                 return -EFSCORRUPTED;
4526         }
4527
4528         if (XFS_FORCED_SHUTDOWN(mp))
4529                 return -EIO;
4530
4531         ifp = XFS_IFORK_PTR(ip, whichfork);
4532
4533         XFS_STATS_INC(mp, xs_blk_mapw);
4534
4535         if (*firstblock == NULLFSBLOCK) {
4536                 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE)
4537                         bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1;
4538                 else
4539                         bma.minleft = 1;
4540         } else {
4541                 bma.minleft = 0;
4542         }
4543
4544         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4545                 error = xfs_iread_extents(tp, ip, whichfork);
4546                 if (error)
4547                         goto error0;
4548         }
4549
4550         xfs_bmap_search_extents(ip, bno, whichfork, &eof, &bma.idx, &bma.got,
4551                                 &bma.prev);
4552         n = 0;
4553         end = bno + len;
4554         obno = bno;
4555
4556         bma.tp = tp;
4557         bma.ip = ip;
4558         bma.total = total;
4559         bma.datatype = 0;
4560         bma.dfops = dfops;
4561         bma.firstblock = firstblock;
4562
4563         while (bno < end && n < *nmap) {
4564                 inhole = eof || bma.got.br_startoff > bno;
4565                 wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
4566
4567                 /*
4568                  * Make sure we only reflink into a hole.
4569                  */
4570                 if (flags & XFS_BMAPI_REMAP)
4571                         ASSERT(inhole);
4572
4573                 /*
4574                  * First, deal with the hole before the allocated space
4575                  * that we found, if any.
4576                  */
4577                 if (inhole || wasdelay) {
4578                         bma.eof = eof;
4579                         bma.conv = !!(flags & XFS_BMAPI_CONVERT);
4580                         bma.wasdel = wasdelay;
4581                         bma.offset = bno;
4582                         bma.flags = flags;
4583
4584                         /*
4585                          * There's a 32/64 bit type mismatch between the
4586                          * allocation length request (which can be 64 bits in
4587                          * length) and the bma length request, which is
4588                          * xfs_extlen_t and therefore 32 bits. Hence we have to
4589                          * check for 32-bit overflows and handle them here.
4590                          */
4591                         if (len > (xfs_filblks_t)MAXEXTLEN)
4592                                 bma.length = MAXEXTLEN;
4593                         else
4594                                 bma.length = len;
4595
4596                         ASSERT(len > 0);
4597                         ASSERT(bma.length > 0);
4598                         error = xfs_bmapi_allocate(&bma);
4599                         if (error)
4600                                 goto error0;
4601                         if (bma.blkno == NULLFSBLOCK)
4602                                 break;
4603                 }
4604
4605                 /* Deal with the allocated space we found.  */
4606                 xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno,
4607                                                         end, n, flags);
4608
4609                 /* Execute unwritten extent conversion if necessary */
4610                 error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
4611                 if (error == -EAGAIN)
4612                         continue;
4613                 if (error)
4614                         goto error0;
4615
4616                 /* update the extent map to return */
4617                 xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
4618
4619                 /*
4620                  * If we're done, stop now.  Stop when we've allocated
4621                  * XFS_BMAP_MAX_NMAP extents no matter what.  Otherwise
4622                  * the transaction may get too big.
4623                  */
4624                 if (bno >= end || n >= *nmap || bma.nallocs >= *nmap)
4625                         break;
4626
4627                 /* Else go on to the next record. */
4628                 bma.prev = bma.got;
4629                 if (++bma.idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) {
4630                         xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma.idx),
4631                                          &bma.got);
4632                 } else
4633                         eof = 1;
4634         }
4635         *nmap = n;
4636
4637         /*
4638          * Transform from btree to extents, give it cur.
4639          */
4640         if (xfs_bmap_wants_extents(ip, whichfork)) {
4641                 int             tmp_logflags = 0;
4642
4643                 ASSERT(bma.cur);
4644                 error = xfs_bmap_btree_to_extents(tp, ip, bma.cur,
4645                         &tmp_logflags, whichfork);
4646                 bma.logflags |= tmp_logflags;
4647                 if (error)
4648                         goto error0;
4649         }
4650
4651         ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE ||
4652                XFS_IFORK_NEXTENTS(ip, whichfork) >
4653                 XFS_IFORK_MAXEXT(ip, whichfork));
4654         error = 0;
4655 error0:
4656         /*
4657          * Log everything.  Do this after conversion, there's no point in
4658          * logging the extent records if we've converted to btree format.
4659          */
4660         if ((bma.logflags & xfs_ilog_fext(whichfork)) &&
4661             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
4662                 bma.logflags &= ~xfs_ilog_fext(whichfork);
4663         else if ((bma.logflags & xfs_ilog_fbroot(whichfork)) &&
4664                  XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
4665                 bma.logflags &= ~xfs_ilog_fbroot(whichfork);
4666         /*
4667          * Log whatever the flags say, even if error.  Otherwise we might miss
4668          * detecting a case where the data is changed, there's an error,
4669          * and it's not logged so we don't shutdown when we should.
4670          */
4671         if (bma.logflags)
4672                 xfs_trans_log_inode(tp, ip, bma.logflags);
4673
4674         if (bma.cur) {
4675                 if (!error) {
4676                         ASSERT(*firstblock == NULLFSBLOCK ||
4677                                XFS_FSB_TO_AGNO(mp, *firstblock) ==
4678                                XFS_FSB_TO_AGNO(mp,
4679                                        bma.cur->bc_private.b.firstblock) ||
4680                                (dfops->dop_low &&
4681                                 XFS_FSB_TO_AGNO(mp, *firstblock) <
4682                                 XFS_FSB_TO_AGNO(mp,
4683                                         bma.cur->bc_private.b.firstblock)));
4684                         *firstblock = bma.cur->bc_private.b.firstblock;
4685                 }
4686                 xfs_btree_del_cursor(bma.cur,
4687                         error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
4688         }
4689         if (!error)
4690                 xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
4691                         orig_nmap, *nmap);
4692         return error;
4693 }
4694
4695 /*
4696  * When a delalloc extent is split (e.g., due to a hole punch), the original
4697  * indlen reservation must be shared across the two new extents that are left
4698  * behind.
4699  *
4700  * Given the original reservation and the worst case indlen for the two new
4701  * extents (as calculated by xfs_bmap_worst_indlen()), split the original
4702  * reservation fairly across the two new extents. If necessary, steal available
4703  * blocks from a deleted extent to make up a reservation deficiency (e.g., if
4704  * ores == 1). The number of stolen blocks is returned. The availability and
4705  * subsequent accounting of stolen blocks is the responsibility of the caller.
4706  */
4707 static xfs_filblks_t
4708 xfs_bmap_split_indlen(
4709         xfs_filblks_t                   ores,           /* original res. */
4710         xfs_filblks_t                   *indlen1,       /* ext1 worst indlen */
4711         xfs_filblks_t                   *indlen2,       /* ext2 worst indlen */
4712         xfs_filblks_t                   avail)          /* stealable blocks */
4713 {
4714         xfs_filblks_t                   len1 = *indlen1;
4715         xfs_filblks_t                   len2 = *indlen2;
4716         xfs_filblks_t                   nres = len1 + len2; /* new total res. */
4717         xfs_filblks_t                   stolen = 0;
4718
4719         /*
4720          * Steal as many blocks as we can to try and satisfy the worst case
4721          * indlen for both new extents.
4722          */
4723         while (nres > ores && avail) {
4724                 nres--;
4725                 avail--;
4726                 stolen++;
4727         }
4728
4729         /*
4730          * The only blocks available are those reserved for the original
4731          * extent and what we can steal from the extent being removed.
4732          * If this still isn't enough to satisfy the combined
4733          * requirements for the two new extents, skim blocks off of each
4734          * of the new reservations until they match what is available.
4735          */
4736         while (nres > ores) {
4737                 if (len1) {
4738                         len1--;
4739                         nres--;
4740                 }
4741                 if (nres == ores)
4742                         break;
4743                 if (len2) {
4744                         len2--;
4745                         nres--;
4746                 }
4747         }
4748
4749         *indlen1 = len1;
4750         *indlen2 = len2;
4751
4752         return stolen;
4753 }
4754
4755 /*
4756  * Called by xfs_bmapi to update file extent records and the btree
4757  * after removing space (or undoing a delayed allocation).
4758  */
4759 STATIC int                              /* error */
4760 xfs_bmap_del_extent(
4761         xfs_inode_t             *ip,    /* incore inode pointer */
4762         xfs_trans_t             *tp,    /* current transaction pointer */
4763         xfs_extnum_t            *idx,   /* extent number to update/delete */
4764         struct xfs_defer_ops    *dfops, /* list of extents to be freed */
4765         xfs_btree_cur_t         *cur,   /* if null, not a btree */
4766         xfs_bmbt_irec_t         *del,   /* data to remove from extents */
4767         int                     *logflagsp, /* inode logging flags */
4768         int                     whichfork, /* data or attr fork */
4769         int                     bflags) /* bmapi flags */
4770 {
4771         xfs_filblks_t           da_new; /* new delay-alloc indirect blocks */
4772         xfs_filblks_t           da_old; /* old delay-alloc indirect blocks */
4773         xfs_fsblock_t           del_endblock=0; /* first block past del */
4774         xfs_fileoff_t           del_endoff;     /* first offset past del */
4775         int                     delay;  /* current block is delayed allocated */
4776         int                     do_fx;  /* free extent at end of routine */
4777         xfs_bmbt_rec_host_t     *ep;    /* current extent entry pointer */
4778         int                     error;  /* error return value */
4779         int                     flags;  /* inode logging flags */
4780         xfs_bmbt_irec_t         got;    /* current extent entry */
4781         xfs_fileoff_t           got_endoff;     /* first offset past got */
4782         int                     i;      /* temp state */
4783         xfs_ifork_t             *ifp;   /* inode fork pointer */
4784         xfs_mount_t             *mp;    /* mount structure */
4785         xfs_filblks_t           nblks;  /* quota/sb block count */
4786         xfs_bmbt_irec_t         new;    /* new record to be inserted */
4787         /* REFERENCED */
4788         uint                    qfield; /* quota field to update */
4789         xfs_filblks_t           temp;   /* for indirect length calculations */
4790         xfs_filblks_t           temp2;  /* for indirect length calculations */
4791         int                     state = 0;
4792
4793         mp = ip->i_mount;
4794         XFS_STATS_INC(mp, xs_del_exlist);
4795
4796         if (whichfork == XFS_ATTR_FORK)
4797                 state |= BMAP_ATTRFORK;
4798
4799         ifp = XFS_IFORK_PTR(ip, whichfork);
4800         ASSERT((*idx >= 0) && (*idx < ifp->if_bytes /
4801                 (uint)sizeof(xfs_bmbt_rec_t)));
4802         ASSERT(del->br_blockcount > 0);
4803         ep = xfs_iext_get_ext(ifp, *idx);
4804         xfs_bmbt_get_all(ep, &got);
4805         ASSERT(got.br_startoff <= del->br_startoff);
4806         del_endoff = del->br_startoff + del->br_blockcount;
4807         got_endoff = got.br_startoff + got.br_blockcount;
4808         ASSERT(got_endoff >= del_endoff);
4809         delay = isnullstartblock(got.br_startblock);
4810         ASSERT(isnullstartblock(del->br_startblock) == delay);
4811         flags = 0;
4812         qfield = 0;
4813         error = 0;
4814         /*
4815          * If deleting a real allocation, must free up the disk space.
4816          */
4817         if (!delay) {
4818                 flags = XFS_ILOG_CORE;
4819                 /*
4820                  * Realtime allocation.  Free it and record di_nblocks update.
4821                  */
4822                 if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
4823                         xfs_fsblock_t   bno;
4824                         xfs_filblks_t   len;
4825
4826                         ASSERT(do_mod(del->br_blockcount,
4827                                       mp->m_sb.sb_rextsize) == 0);
4828                         ASSERT(do_mod(del->br_startblock,
4829                                       mp->m_sb.sb_rextsize) == 0);
4830                         bno = del->br_startblock;
4831                         len = del->br_blockcount;
4832                         do_div(bno, mp->m_sb.sb_rextsize);
4833                         do_div(len, mp->m_sb.sb_rextsize);
4834                         error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
4835                         if (error)
4836                                 goto done;
4837                         do_fx = 0;
4838                         nblks = len * mp->m_sb.sb_rextsize;
4839                         qfield = XFS_TRANS_DQ_RTBCOUNT;
4840                 }
4841                 /*
4842                  * Ordinary allocation.
4843                  */
4844                 else {
4845                         do_fx = 1;
4846                         nblks = del->br_blockcount;
4847                         qfield = XFS_TRANS_DQ_BCOUNT;
4848                 }
4849                 /*
4850                  * Set up del_endblock and cur for later.
4851                  */
4852                 del_endblock = del->br_startblock + del->br_blockcount;
4853                 if (cur) {
4854                         if ((error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
4855                                         got.br_startblock, got.br_blockcount,
4856                                         &i)))
4857                                 goto done;
4858                         XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
4859                 }
4860                 da_old = da_new = 0;
4861         } else {
4862                 da_old = startblockval(got.br_startblock);
4863                 da_new = 0;
4864                 nblks = 0;
4865                 do_fx = 0;
4866         }
4867
4868         /*
4869          * Set flag value to use in switch statement.
4870          * Left-contig is 2, right-contig is 1.
4871          */
4872         switch (((got.br_startoff == del->br_startoff) << 1) |
4873                 (got_endoff == del_endoff)) {
4874         case 3:
4875                 /*
4876                  * Matches the whole extent.  Delete the entry.
4877                  */
4878                 xfs_iext_remove(ip, *idx, 1,
4879                                 whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0);
4880                 --*idx;
4881                 if (delay)
4882                         break;
4883
4884                 XFS_IFORK_NEXT_SET(ip, whichfork,
4885                         XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
4886                 flags |= XFS_ILOG_CORE;
4887                 if (!cur) {
4888                         flags |= xfs_ilog_fext(whichfork);
4889                         break;
4890                 }
4891                 if ((error = xfs_btree_delete(cur, &i)))
4892                         goto done;
4893                 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
4894                 break;
4895
4896         case 2:
4897                 /*
4898                  * Deleting the first part of the extent.
4899                  */
4900                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
4901                 xfs_bmbt_set_startoff(ep, del_endoff);
4902                 temp = got.br_blockcount - del->br_blockcount;
4903                 xfs_bmbt_set_blockcount(ep, temp);
4904                 if (delay) {
4905                         temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
4906                                 da_old);
4907                         xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
4908                         trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
4909                         da_new = temp;
4910                         break;
4911                 }
4912                 xfs_bmbt_set_startblock(ep, del_endblock);
4913                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
4914                 if (!cur) {
4915                         flags |= xfs_ilog_fext(whichfork);
4916                         break;
4917                 }
4918                 if ((error = xfs_bmbt_update(cur, del_endoff, del_endblock,
4919                                 got.br_blockcount - del->br_blockcount,
4920                                 got.br_state)))
4921                         goto done;
4922                 break;
4923
4924         case 1:
4925                 /*
4926                  * Deleting the last part of the extent.
4927                  */
4928                 temp = got.br_blockcount - del->br_blockcount;
4929                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
4930                 xfs_bmbt_set_blockcount(ep, temp);
4931                 if (delay) {
4932                         temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
4933                                 da_old);
4934                         xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
4935                         trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
4936                         da_new = temp;
4937                         break;
4938                 }
4939                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
4940                 if (!cur) {
4941                         flags |= xfs_ilog_fext(whichfork);
4942                         break;
4943                 }
4944                 if ((error = xfs_bmbt_update(cur, got.br_startoff,
4945                                 got.br_startblock,
4946                                 got.br_blockcount - del->br_blockcount,
4947                                 got.br_state)))
4948                         goto done;
4949                 break;
4950
4951         case 0:
4952                 /*
4953                  * Deleting the middle of the extent.
4954                  */
4955                 temp = del->br_startoff - got.br_startoff;
4956                 trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
4957                 xfs_bmbt_set_blockcount(ep, temp);
4958                 new.br_startoff = del_endoff;
4959                 temp2 = got_endoff - del_endoff;
4960                 new.br_blockcount = temp2;
4961                 new.br_state = got.br_state;
4962                 if (!delay) {
4963                         new.br_startblock = del_endblock;
4964                         flags |= XFS_ILOG_CORE;
4965                         if (cur) {
4966                                 if ((error = xfs_bmbt_update(cur,
4967                                                 got.br_startoff,
4968                                                 got.br_startblock, temp,
4969                                                 got.br_state)))
4970                                         goto done;
4971                                 if ((error = xfs_btree_increment(cur, 0, &i)))
4972                                         goto done;
4973                                 cur->bc_rec.b = new;
4974                                 error = xfs_btree_insert(cur, &i);
4975                                 if (error && error != -ENOSPC)
4976                                         goto done;
4977                                 /*
4978                                  * If get no-space back from btree insert,
4979                                  * it tried a split, and we have a zero
4980                                  * block reservation.
4981                                  * Fix up our state and return the error.
4982                                  */
4983                                 if (error == -ENOSPC) {
4984                                         /*
4985                                          * Reset the cursor, don't trust
4986                                          * it after any insert operation.
4987                                          */
4988                                         if ((error = xfs_bmbt_lookup_eq(cur,
4989                                                         got.br_startoff,
4990                                                         got.br_startblock,
4991                                                         temp, &i)))
4992                                                 goto done;
4993                                         XFS_WANT_CORRUPTED_GOTO(mp,
4994                                                                 i == 1, done);
4995                                         /*
4996                                          * Update the btree record back
4997                                          * to the original value.
4998                                          */
4999                                         if ((error = xfs_bmbt_update(cur,
5000                                                         got.br_startoff,
5001                                                         got.br_startblock,
5002                                                         got.br_blockcount,
5003                                                         got.br_state)))
5004                                                 goto done;
5005                                         /*
5006                                          * Reset the extent record back
5007                                          * to the original value.
5008                                          */
5009                                         xfs_bmbt_set_blockcount(ep,
5010                                                 got.br_blockcount);
5011                                         flags = 0;
5012                                         error = -ENOSPC;
5013                                         goto done;
5014                                 }
5015                                 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
5016                         } else
5017                                 flags |= xfs_ilog_fext(whichfork);
5018                         XFS_IFORK_NEXT_SET(ip, whichfork,
5019                                 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
5020                 } else {
5021                         xfs_filblks_t   stolen;
5022                         ASSERT(whichfork == XFS_DATA_FORK);
5023
5024                         /*
5025                          * Distribute the original indlen reservation across the
5026                          * two new extents. Steal blocks from the deleted extent
5027                          * if necessary. Stealing blocks simply fudges the
5028                          * fdblocks accounting in xfs_bunmapi().
5029                          */
5030                         temp = xfs_bmap_worst_indlen(ip, got.br_blockcount);
5031                         temp2 = xfs_bmap_worst_indlen(ip, new.br_blockcount);
5032                         stolen = xfs_bmap_split_indlen(da_old, &temp, &temp2,
5033                                                        del->br_blockcount);
5034                         da_new = temp + temp2 - stolen;
5035                         del->br_blockcount -= stolen;
5036
5037                         /*
5038                          * Set the reservation for each extent. Warn if either
5039                          * is zero as this can lead to delalloc problems.
5040                          */
5041                         WARN_ON_ONCE(!temp || !temp2);
5042                         xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
5043                         new.br_startblock = nullstartblock((int)temp2);
5044                 }
5045                 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
5046                 xfs_iext_insert(ip, *idx + 1, 1, &new, state);
5047                 ++*idx;
5048                 break;
5049         }
5050
5051         /* remove reverse mapping */
5052         if (!delay) {
5053                 error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, del);
5054                 if (error)
5055                         goto done;
5056         }
5057
5058         /*
5059          * If we need to, add to list of extents to delete.
5060          */
5061         if (do_fx && !(bflags & XFS_BMAPI_REMAP)) {
5062                 if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
5063                         error = xfs_refcount_decrease_extent(mp, dfops, del);
5064                         if (error)
5065                                 goto done;
5066                 } else
5067                         xfs_bmap_add_free(mp, dfops, del->br_startblock,
5068                                         del->br_blockcount, NULL);
5069         }
5070
5071         /*
5072          * Adjust inode # blocks in the file.
5073          */
5074         if (nblks)
5075                 ip->i_d.di_nblocks -= nblks;
5076         /*
5077          * Adjust quota data.
5078          */
5079         if (qfield && !(bflags & XFS_BMAPI_REMAP))
5080                 xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
5081
5082         /*
5083          * Account for change in delayed indirect blocks.
5084          * Nothing to do for disk quota accounting here.
5085          */
5086         ASSERT(da_old >= da_new);
5087         if (da_old > da_new)
5088                 xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false);
5089 done:
5090         *logflagsp = flags;
5091         return error;
5092 }
5093
5094 /*
5095  * Unmap (remove) blocks from a file.
5096  * If nexts is nonzero then the number of extents to remove is limited to
5097  * that value.  If not all extents in the block range can be removed then
5098  * *done is set.
5099  */
5100 int                                             /* error */
5101 __xfs_bunmapi(
5102         xfs_trans_t             *tp,            /* transaction pointer */
5103         struct xfs_inode        *ip,            /* incore inode */
5104         xfs_fileoff_t           bno,            /* starting offset to unmap */
5105         xfs_filblks_t           *rlen,          /* i/o: amount remaining */
5106         int                     flags,          /* misc flags */
5107         xfs_extnum_t            nexts,          /* number of extents max */
5108         xfs_fsblock_t           *firstblock,    /* first allocated block
5109                                                    controls a.g. for allocs */
5110         struct xfs_defer_ops    *dfops)         /* i/o: deferred updates */
5111 {
5112         xfs_btree_cur_t         *cur;           /* bmap btree cursor */
5113         xfs_bmbt_irec_t         del;            /* extent being deleted */
5114         int                     eof;            /* is deleting at eof */
5115         xfs_bmbt_rec_host_t     *ep;            /* extent record pointer */
5116         int                     error;          /* error return value */
5117         xfs_extnum_t            extno;          /* extent number in list */
5118         xfs_bmbt_irec_t         got;            /* current extent record */
5119         xfs_ifork_t             *ifp;           /* inode fork pointer */
5120         int                     isrt;           /* freeing in rt area */
5121         xfs_extnum_t            lastx;          /* last extent index used */
5122         int                     logflags;       /* transaction logging flags */
5123         xfs_extlen_t            mod;            /* rt extent offset */
5124         xfs_mount_t             *mp;            /* mount structure */
5125         xfs_extnum_t            nextents;       /* number of file extents */
5126         xfs_bmbt_irec_t         prev;           /* previous extent record */
5127         xfs_fileoff_t           start;          /* first file offset deleted */
5128         int                     tmp_logflags;   /* partial logging flags */
5129         int                     wasdel;         /* was a delayed alloc extent */
5130         int                     whichfork;      /* data or attribute fork */
5131         xfs_fsblock_t           sum;
5132         xfs_filblks_t           len = *rlen;    /* length to unmap in file */
5133
5134         trace_xfs_bunmap(ip, bno, len, flags, _RET_IP_);
5135
5136         whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
5137                 XFS_ATTR_FORK : XFS_DATA_FORK;
5138         ifp = XFS_IFORK_PTR(ip, whichfork);
5139         if (unlikely(
5140             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5141             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
5142                 XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW,
5143                                  ip->i_mount);
5144                 return -EFSCORRUPTED;
5145         }
5146         mp = ip->i_mount;
5147         if (XFS_FORCED_SHUTDOWN(mp))
5148                 return -EIO;
5149
5150         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5151         ASSERT(len > 0);
5152         ASSERT(nexts >= 0);
5153
5154         if (!(ifp->if_flags & XFS_IFEXTENTS) &&
5155             (error = xfs_iread_extents(tp, ip, whichfork)))
5156                 return error;
5157         nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
5158         if (nextents == 0) {
5159                 *rlen = 0;
5160                 return 0;
5161         }
5162         XFS_STATS_INC(mp, xs_blk_unmap);
5163         isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
5164         start = bno;
5165         bno = start + len - 1;
5166         ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
5167                 &prev);
5168
5169         /*
5170          * Check to see if the given block number is past the end of the
5171          * file, back up to the last block if so...
5172          */
5173         if (eof) {
5174                 ep = xfs_iext_get_ext(ifp, --lastx);
5175                 xfs_bmbt_get_all(ep, &got);
5176                 bno = got.br_startoff + got.br_blockcount - 1;
5177         }
5178         logflags = 0;
5179         if (ifp->if_flags & XFS_IFBROOT) {
5180                 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE);
5181                 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5182                 cur->bc_private.b.firstblock = *firstblock;
5183                 cur->bc_private.b.dfops = dfops;
5184                 cur->bc_private.b.flags = 0;
5185         } else
5186                 cur = NULL;
5187
5188         if (isrt) {
5189                 /*
5190                  * Synchronize by locking the bitmap inode.
5191                  */
5192                 xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
5193                 xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
5194                 xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
5195                 xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
5196         }
5197
5198         extno = 0;
5199         while (bno != (xfs_fileoff_t)-1 && bno >= start && lastx >= 0 &&
5200                (nexts == 0 || extno < nexts)) {
5201                 /*
5202                  * Is the found extent after a hole in which bno lives?
5203                  * Just back up to the previous extent, if so.
5204                  */
5205                 if (got.br_startoff > bno) {
5206                         if (--lastx < 0)
5207                                 break;
5208                         ep = xfs_iext_get_ext(ifp, lastx);
5209                         xfs_bmbt_get_all(ep, &got);
5210                 }
5211                 /*
5212                  * Is the last block of this extent before the range
5213                  * we're supposed to delete?  If so, we're done.
5214                  */
5215                 bno = XFS_FILEOFF_MIN(bno,
5216                         got.br_startoff + got.br_blockcount - 1);
5217                 if (bno < start)
5218                         break;
5219                 /*
5220                  * Then deal with the (possibly delayed) allocated space
5221                  * we found.
5222                  */
5223                 ASSERT(ep != NULL);
5224                 del = got;
5225                 wasdel = isnullstartblock(del.br_startblock);
5226                 if (got.br_startoff < start) {
5227                         del.br_startoff = start;
5228                         del.br_blockcount -= start - got.br_startoff;
5229                         if (!wasdel)
5230                                 del.br_startblock += start - got.br_startoff;
5231                 }
5232                 if (del.br_startoff + del.br_blockcount > bno + 1)
5233                         del.br_blockcount = bno + 1 - del.br_startoff;
5234                 sum = del.br_startblock + del.br_blockcount;
5235                 if (isrt &&
5236                     (mod = do_mod(sum, mp->m_sb.sb_rextsize))) {
5237                         /*
5238                          * Realtime extent not lined up at the end.
5239                          * The extent could have been split into written
5240                          * and unwritten pieces, or we could just be
5241                          * unmapping part of it.  But we can't really
5242                          * get rid of part of a realtime extent.
5243                          */
5244                         if (del.br_state == XFS_EXT_UNWRITTEN ||
5245                             !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
5246                                 /*
5247                                  * This piece is unwritten, or we're not
5248                                  * using unwritten extents.  Skip over it.
5249                                  */
5250                                 ASSERT(bno >= mod);
5251                                 bno -= mod > del.br_blockcount ?
5252                                         del.br_blockcount : mod;
5253                                 if (bno < got.br_startoff) {
5254                                         if (--lastx >= 0)
5255                                                 xfs_bmbt_get_all(xfs_iext_get_ext(
5256                                                         ifp, lastx), &got);
5257                                 }
5258                                 continue;
5259                         }
5260                         /*
5261                          * It's written, turn it unwritten.
5262                          * This is better than zeroing it.
5263                          */
5264                         ASSERT(del.br_state == XFS_EXT_NORM);
5265                         ASSERT(tp->t_blk_res > 0);
5266                         /*
5267                          * If this spans a realtime extent boundary,
5268                          * chop it back to the start of the one we end at.
5269                          */
5270                         if (del.br_blockcount > mod) {
5271                                 del.br_startoff += del.br_blockcount - mod;
5272                                 del.br_startblock += del.br_blockcount - mod;
5273                                 del.br_blockcount = mod;
5274                         }
5275                         del.br_state = XFS_EXT_UNWRITTEN;
5276                         error = xfs_bmap_add_extent_unwritten_real(tp, ip,
5277                                         &lastx, &cur, &del, firstblock, dfops,
5278                                         &logflags);
5279                         if (error)
5280                                 goto error0;
5281                         goto nodelete;
5282                 }
5283                 if (isrt && (mod = do_mod(del.br_startblock, mp->m_sb.sb_rextsize))) {
5284                         /*
5285                          * Realtime extent is lined up at the end but not
5286                          * at the front.  We'll get rid of full extents if
5287                          * we can.
5288                          */
5289                         mod = mp->m_sb.sb_rextsize - mod;
5290                         if (del.br_blockcount > mod) {
5291                                 del.br_blockcount -= mod;
5292                                 del.br_startoff += mod;
5293                                 del.br_startblock += mod;
5294                         } else if ((del.br_startoff == start &&
5295                                     (del.br_state == XFS_EXT_UNWRITTEN ||
5296                                      tp->t_blk_res == 0)) ||
5297                                    !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
5298                                 /*
5299                                  * Can't make it unwritten.  There isn't
5300                                  * a full extent here so just skip it.
5301                                  */
5302                                 ASSERT(bno >= del.br_blockcount);
5303                                 bno -= del.br_blockcount;
5304                                 if (got.br_startoff > bno) {
5305                                         if (--lastx >= 0) {
5306                                                 ep = xfs_iext_get_ext(ifp,
5307                                                                       lastx);
5308                                                 xfs_bmbt_get_all(ep, &got);
5309                                         }
5310                                 }
5311                                 continue;
5312                         } else if (del.br_state == XFS_EXT_UNWRITTEN) {
5313                                 /*
5314                                  * This one is already unwritten.
5315                                  * It must have a written left neighbor.
5316                                  * Unwrite the killed part of that one and
5317                                  * try again.
5318                                  */
5319                                 ASSERT(lastx > 0);
5320                                 xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
5321                                                 lastx - 1), &prev);
5322                                 ASSERT(prev.br_state == XFS_EXT_NORM);
5323                                 ASSERT(!isnullstartblock(prev.br_startblock));
5324                                 ASSERT(del.br_startblock ==
5325                                        prev.br_startblock + prev.br_blockcount);
5326                                 if (prev.br_startoff < start) {
5327                                         mod = start - prev.br_startoff;
5328                                         prev.br_blockcount -= mod;
5329                                         prev.br_startblock += mod;
5330                                         prev.br_startoff = start;
5331                                 }
5332                                 prev.br_state = XFS_EXT_UNWRITTEN;
5333                                 lastx--;
5334                                 error = xfs_bmap_add_extent_unwritten_real(tp,
5335                                                 ip, &lastx, &cur, &prev,
5336                                                 firstblock, dfops, &logflags);
5337                                 if (error)
5338                                         goto error0;
5339                                 goto nodelete;
5340                         } else {
5341                                 ASSERT(del.br_state == XFS_EXT_NORM);
5342                                 del.br_state = XFS_EXT_UNWRITTEN;
5343                                 error = xfs_bmap_add_extent_unwritten_real(tp,
5344                                                 ip, &lastx, &cur, &del,
5345                                                 firstblock, dfops, &logflags);
5346                                 if (error)
5347                                         goto error0;
5348                                 goto nodelete;
5349                         }
5350                 }
5351
5352                 /*
5353                  * If it's the case where the directory code is running
5354                  * with no block reservation, and the deleted block is in
5355                  * the middle of its extent, and the resulting insert
5356                  * of an extent would cause transformation to btree format,
5357                  * then reject it.  The calling code will then swap
5358                  * blocks around instead.
5359                  * We have to do this now, rather than waiting for the
5360                  * conversion to btree format, since the transaction
5361                  * will be dirty.
5362                  */
5363                 if (!wasdel && tp->t_blk_res == 0 &&
5364                     XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
5365                     XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */
5366                         XFS_IFORK_MAXEXT(ip, whichfork) &&
5367                     del.br_startoff > got.br_startoff &&
5368                     del.br_startoff + del.br_blockcount <
5369                     got.br_startoff + got.br_blockcount) {
5370                         error = -ENOSPC;
5371                         goto error0;
5372                 }
5373
5374                 /*
5375                  * Unreserve quota and update realtime free space, if
5376                  * appropriate. If delayed allocation, update the inode delalloc
5377                  * counter now and wait to update the sb counters as
5378                  * xfs_bmap_del_extent() might need to borrow some blocks.
5379                  */
5380                 if (wasdel) {
5381                         ASSERT(startblockval(del.br_startblock) > 0);
5382                         if (isrt) {
5383                                 xfs_filblks_t rtexts;
5384
5385                                 rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
5386                                 do_div(rtexts, mp->m_sb.sb_rextsize);
5387                                 xfs_mod_frextents(mp, (int64_t)rtexts);
5388                                 (void)xfs_trans_reserve_quota_nblks(NULL,
5389                                         ip, -((long)del.br_blockcount), 0,
5390                                         XFS_QMOPT_RES_RTBLKS);
5391                         } else {
5392                                 (void)xfs_trans_reserve_quota_nblks(NULL,
5393                                         ip, -((long)del.br_blockcount), 0,
5394                                         XFS_QMOPT_RES_REGBLKS);
5395                         }
5396                         ip->i_delayed_blks -= del.br_blockcount;
5397                         if (cur)
5398                                 cur->bc_private.b.flags |=
5399                                         XFS_BTCUR_BPRV_WASDEL;
5400                 } else if (cur)
5401                         cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL;
5402
5403                 error = xfs_bmap_del_extent(ip, tp, &lastx, dfops, cur, &del,
5404                                 &tmp_logflags, whichfork, flags);
5405                 logflags |= tmp_logflags;
5406                 if (error)
5407                         goto error0;
5408
5409                 if (!isrt && wasdel)
5410                         xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount, false);
5411
5412                 bno = del.br_startoff - 1;
5413 nodelete:
5414                 /*
5415                  * If not done go on to the next (previous) record.
5416                  */
5417                 if (bno != (xfs_fileoff_t)-1 && bno >= start) {
5418                         if (lastx >= 0) {
5419                                 ep = xfs_iext_get_ext(ifp, lastx);
5420                                 if (xfs_bmbt_get_startoff(ep) > bno) {
5421                                         if (--lastx >= 0)
5422                                                 ep = xfs_iext_get_ext(ifp,
5423                                                                       lastx);
5424                                 }
5425                                 xfs_bmbt_get_all(ep, &got);
5426                         }
5427                         extno++;
5428                 }
5429         }
5430         if (bno == (xfs_fileoff_t)-1 || bno < start || lastx < 0)
5431                 *rlen = 0;
5432         else
5433                 *rlen = bno - start + 1;
5434
5435         /*
5436          * Convert to a btree if necessary.
5437          */
5438         if (xfs_bmap_needs_btree(ip, whichfork)) {
5439                 ASSERT(cur == NULL);
5440                 error = xfs_bmap_extents_to_btree(tp, ip, firstblock, dfops,
5441                         &cur, 0, &tmp_logflags, whichfork);
5442                 logflags |= tmp_logflags;
5443                 if (error)
5444                         goto error0;
5445         }
5446         /*
5447          * transform from btree to extents, give it cur
5448          */
5449         else if (xfs_bmap_wants_extents(ip, whichfork)) {
5450                 ASSERT(cur != NULL);
5451                 error = xfs_bmap_btree_to_extents(tp, ip, cur, &tmp_logflags,
5452                         whichfork);
5453                 logflags |= tmp_logflags;
5454                 if (error)
5455                         goto error0;
5456         }
5457         /*
5458          * transform from extents to local?
5459          */
5460         error = 0;
5461 error0:
5462         /*
5463          * Log everything.  Do this after conversion, there's no point in
5464          * logging the extent records if we've converted to btree format.
5465          */
5466         if ((logflags & xfs_ilog_fext(whichfork)) &&
5467             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
5468                 logflags &= ~xfs_ilog_fext(whichfork);
5469         else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
5470                  XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
5471                 logflags &= ~xfs_ilog_fbroot(whichfork);
5472         /*
5473          * Log inode even in the error case, if the transaction
5474          * is dirty we'll need to shut down the filesystem.
5475          */
5476         if (logflags)
5477                 xfs_trans_log_inode(tp, ip, logflags);
5478         if (cur) {
5479                 if (!error) {
5480                         *firstblock = cur->bc_private.b.firstblock;
5481                         cur->bc_private.b.allocated = 0;
5482                 }
5483                 xfs_btree_del_cursor(cur,
5484                         error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5485         }
5486         return error;
5487 }
5488
5489 /* Unmap a range of a file. */
5490 int
5491 xfs_bunmapi(
5492         xfs_trans_t             *tp,
5493         struct xfs_inode        *ip,
5494         xfs_fileoff_t           bno,
5495         xfs_filblks_t           len,
5496         int                     flags,
5497         xfs_extnum_t            nexts,
5498         xfs_fsblock_t           *firstblock,
5499         struct xfs_defer_ops    *dfops,
5500         int                     *done)
5501 {
5502         int                     error;
5503
5504         error = __xfs_bunmapi(tp, ip, bno, &len, flags, nexts, firstblock,
5505                         dfops);
5506         *done = (len == 0);
5507         return error;
5508 }
5509
5510 /*
5511  * Determine whether an extent shift can be accomplished by a merge with the
5512  * extent that precedes the target hole of the shift.
5513  */
5514 STATIC bool
5515 xfs_bmse_can_merge(
5516         struct xfs_bmbt_irec    *left,  /* preceding extent */
5517         struct xfs_bmbt_irec    *got,   /* current extent to shift */
5518         xfs_fileoff_t           shift)  /* shift fsb */
5519 {
5520         xfs_fileoff_t           startoff;
5521
5522         startoff = got->br_startoff - shift;
5523
5524         /*
5525          * The extent, once shifted, must be adjacent in-file and on-disk with
5526          * the preceding extent.
5527          */
5528         if ((left->br_startoff + left->br_blockcount != startoff) ||
5529             (left->br_startblock + left->br_blockcount != got->br_startblock) ||
5530             (left->br_state != got->br_state) ||
5531             (left->br_blockcount + got->br_blockcount > MAXEXTLEN))
5532                 return false;
5533
5534         return true;
5535 }
5536
5537 /*
5538  * A bmap extent shift adjusts the file offset of an extent to fill a preceding
5539  * hole in the file. If an extent shift would result in the extent being fully
5540  * adjacent to the extent that currently precedes the hole, we can merge with
5541  * the preceding extent rather than do the shift.
5542  *
5543  * This function assumes the caller has verified a shift-by-merge is possible
5544  * with the provided extents via xfs_bmse_can_merge().
5545  */
5546 STATIC int
5547 xfs_bmse_merge(
5548         struct xfs_inode                *ip,
5549         int                             whichfork,
5550         xfs_fileoff_t                   shift,          /* shift fsb */
5551         int                             current_ext,    /* idx of gotp */
5552         struct xfs_bmbt_rec_host        *gotp,          /* extent to shift */
5553         struct xfs_bmbt_rec_host        *leftp,         /* preceding extent */
5554         struct xfs_btree_cur            *cur,
5555         int                             *logflags)      /* output */
5556 {
5557         struct xfs_bmbt_irec            got;
5558         struct xfs_bmbt_irec            left;
5559         xfs_filblks_t                   blockcount;
5560         int                             error, i;
5561         struct xfs_mount                *mp = ip->i_mount;
5562
5563         xfs_bmbt_get_all(gotp, &got);
5564         xfs_bmbt_get_all(leftp, &left);
5565         blockcount = left.br_blockcount + got.br_blockcount;
5566
5567         ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5568         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5569         ASSERT(xfs_bmse_can_merge(&left, &got, shift));
5570
5571         /*
5572          * Merge the in-core extents. Note that the host record pointers and
5573          * current_ext index are invalid once the extent has been removed via
5574          * xfs_iext_remove().
5575          */
5576         xfs_bmbt_set_blockcount(leftp, blockcount);
5577         xfs_iext_remove(ip, current_ext, 1, 0);
5578
5579         /*
5580          * Update the on-disk extent count, the btree if necessary and log the
5581          * inode.
5582          */
5583         XFS_IFORK_NEXT_SET(ip, whichfork,
5584                            XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
5585         *logflags |= XFS_ILOG_CORE;
5586         if (!cur) {
5587                 *logflags |= XFS_ILOG_DEXT;
5588                 return 0;
5589         }
5590
5591         /* lookup and remove the extent to merge */
5592         error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock,
5593                                    got.br_blockcount, &i);
5594         if (error)
5595                 return error;
5596         XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5597
5598         error = xfs_btree_delete(cur, &i);
5599         if (error)
5600                 return error;
5601         XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5602
5603         /* lookup and update size of the previous extent */
5604         error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock,
5605                                    left.br_blockcount, &i);
5606         if (error)
5607                 return error;
5608         XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5609
5610         left.br_blockcount = blockcount;
5611
5612         return xfs_bmbt_update(cur, left.br_startoff, left.br_startblock,
5613                                left.br_blockcount, left.br_state);
5614 }
5615
5616 /*
5617  * Shift a single extent.
5618  */
5619 STATIC int
5620 xfs_bmse_shift_one(
5621         struct xfs_inode                *ip,
5622         int                             whichfork,
5623         xfs_fileoff_t                   offset_shift_fsb,
5624         int                             *current_ext,
5625         struct xfs_bmbt_rec_host        *gotp,
5626         struct xfs_btree_cur            *cur,
5627         int                             *logflags,
5628         enum shift_direction            direction,
5629         struct xfs_defer_ops            *dfops)
5630 {
5631         struct xfs_ifork                *ifp;
5632         struct xfs_mount                *mp;
5633         xfs_fileoff_t                   startoff;
5634         struct xfs_bmbt_rec_host        *adj_irecp;
5635         struct xfs_bmbt_irec            got;
5636         struct xfs_bmbt_irec            adj_irec;
5637         int                             error;
5638         int                             i;
5639         int                             total_extents;
5640
5641         mp = ip->i_mount;
5642         ifp = XFS_IFORK_PTR(ip, whichfork);
5643         total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5644
5645         xfs_bmbt_get_all(gotp, &got);
5646
5647         /* delalloc extents should be prevented by caller */
5648         XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock));
5649
5650         if (direction == SHIFT_LEFT) {
5651                 startoff = got.br_startoff - offset_shift_fsb;
5652
5653                 /*
5654                  * Check for merge if we've got an extent to the left,
5655                  * otherwise make sure there's enough room at the start
5656                  * of the file for the shift.
5657                  */
5658                 if (!*current_ext) {
5659                         if (got.br_startoff < offset_shift_fsb)
5660                                 return -EINVAL;
5661                         goto update_current_ext;
5662                 }
5663                 /*
5664                  * grab the left extent and check for a large
5665                  * enough hole.
5666                  */
5667                 adj_irecp = xfs_iext_get_ext(ifp, *current_ext - 1);
5668                 xfs_bmbt_get_all(adj_irecp, &adj_irec);
5669
5670                 if (startoff <
5671                     adj_irec.br_startoff + adj_irec.br_blockcount)
5672                         return -EINVAL;
5673
5674                 /* check whether to merge the extent or shift it down */
5675                 if (xfs_bmse_can_merge(&adj_irec, &got,
5676                                        offset_shift_fsb)) {
5677                         error = xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
5678                                                *current_ext, gotp, adj_irecp,
5679                                                cur, logflags);
5680                         if (error)
5681                                 return error;
5682                         adj_irec = got;
5683                         goto update_rmap;
5684                 }
5685         } else {
5686                 startoff = got.br_startoff + offset_shift_fsb;
5687                 /* nothing to move if this is the last extent */
5688                 if (*current_ext >= (total_extents - 1))
5689                         goto update_current_ext;
5690                 /*
5691                  * If this is not the last extent in the file, make sure there
5692                  * is enough room between current extent and next extent for
5693                  * accommodating the shift.
5694                  */
5695                 adj_irecp = xfs_iext_get_ext(ifp, *current_ext + 1);
5696                 xfs_bmbt_get_all(adj_irecp, &adj_irec);
5697                 if (startoff + got.br_blockcount > adj_irec.br_startoff)
5698                         return -EINVAL;
5699                 /*
5700                  * Unlike a left shift (which involves a hole punch),
5701                  * a right shift does not modify extent neighbors
5702                  * in any way. We should never find mergeable extents
5703                  * in this scenario. Check anyways and warn if we
5704                  * encounter two extents that could be one.
5705                  */
5706                 if (xfs_bmse_can_merge(&got, &adj_irec, offset_shift_fsb))
5707                         WARN_ON_ONCE(1);
5708         }
5709         /*
5710          * Increment the extent index for the next iteration, update the start
5711          * offset of the in-core extent and update the btree if applicable.
5712          */
5713 update_current_ext:
5714         if (direction == SHIFT_LEFT)
5715                 (*current_ext)++;
5716         else
5717                 (*current_ext)--;
5718         xfs_bmbt_set_startoff(gotp, startoff);
5719         *logflags |= XFS_ILOG_CORE;
5720         adj_irec = got;
5721         if (!cur) {
5722                 *logflags |= XFS_ILOG_DEXT;
5723                 goto update_rmap;
5724         }
5725
5726         error = xfs_bmbt_lookup_eq(cur, got.br_startoff, got.br_startblock,
5727                                    got.br_blockcount, &i);
5728         if (error)
5729                 return error;
5730         XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
5731
5732         got.br_startoff = startoff;
5733         error = xfs_bmbt_update(cur, got.br_startoff, got.br_startblock,
5734                         got.br_blockcount, got.br_state);
5735         if (error)
5736                 return error;
5737
5738 update_rmap:
5739         /* update reverse mapping */
5740         error = xfs_rmap_unmap_extent(mp, dfops, ip, whichfork, &adj_irec);
5741         if (error)
5742                 return error;
5743         adj_irec.br_startoff = startoff;
5744         return xfs_rmap_map_extent(mp, dfops, ip, whichfork, &adj_irec);
5745 }
5746
5747 /*
5748  * Shift extent records to the left/right to cover/create a hole.
5749  *
5750  * The maximum number of extents to be shifted in a single operation is
5751  * @num_exts. @stop_fsb specifies the file offset at which to stop shift and the
5752  * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb
5753  * is the length by which each extent is shifted. If there is no hole to shift
5754  * the extents into, this will be considered invalid operation and we abort
5755  * immediately.
5756  */
5757 int
5758 xfs_bmap_shift_extents(
5759         struct xfs_trans        *tp,
5760         struct xfs_inode        *ip,
5761         xfs_fileoff_t           *next_fsb,
5762         xfs_fileoff_t           offset_shift_fsb,
5763         int                     *done,
5764         xfs_fileoff_t           stop_fsb,
5765         xfs_fsblock_t           *firstblock,
5766         struct xfs_defer_ops    *dfops,
5767         enum shift_direction    direction,
5768         int                     num_exts)
5769 {
5770         struct xfs_btree_cur            *cur = NULL;
5771         struct xfs_bmbt_rec_host        *gotp;
5772         struct xfs_bmbt_irec            got;
5773         struct xfs_mount                *mp = ip->i_mount;
5774         struct xfs_ifork                *ifp;
5775         xfs_extnum_t                    nexts = 0;
5776         xfs_extnum_t                    current_ext;
5777         xfs_extnum_t                    total_extents;
5778         xfs_extnum_t                    stop_extent;
5779         int                             error = 0;
5780         int                             whichfork = XFS_DATA_FORK;
5781         int                             logflags = 0;
5782
5783         if (unlikely(XFS_TEST_ERROR(
5784             (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5785              XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
5786              mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
5787                 XFS_ERROR_REPORT("xfs_bmap_shift_extents",
5788                                  XFS_ERRLEVEL_LOW, mp);
5789                 return -EFSCORRUPTED;
5790         }
5791
5792         if (XFS_FORCED_SHUTDOWN(mp))
5793                 return -EIO;
5794
5795         ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
5796         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
5797         ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
5798         ASSERT(*next_fsb != NULLFSBLOCK || direction == SHIFT_RIGHT);
5799
5800         ifp = XFS_IFORK_PTR(ip, whichfork);
5801         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5802                 /* Read in all the extents */
5803                 error = xfs_iread_extents(tp, ip, whichfork);
5804                 if (error)
5805                         return error;
5806         }
5807
5808         if (ifp->if_flags & XFS_IFBROOT) {
5809                 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5810                 cur->bc_private.b.firstblock = *firstblock;
5811                 cur->bc_private.b.dfops = dfops;
5812                 cur->bc_private.b.flags = 0;
5813         }
5814
5815         /*
5816          * There may be delalloc extents in the data fork before the range we
5817          * are collapsing out, so we cannot use the count of real extents here.
5818          * Instead we have to calculate it from the incore fork.
5819          */
5820         total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5821         if (total_extents == 0) {
5822                 *done = 1;
5823                 goto del_cursor;
5824         }
5825
5826         /*
5827          * In case of first right shift, we need to initialize next_fsb
5828          */
5829         if (*next_fsb == NULLFSBLOCK) {
5830                 gotp = xfs_iext_get_ext(ifp, total_extents - 1);
5831                 xfs_bmbt_get_all(gotp, &got);
5832                 *next_fsb = got.br_startoff;
5833                 if (stop_fsb > *next_fsb) {
5834                         *done = 1;
5835                         goto del_cursor;
5836                 }
5837         }
5838
5839         /* Lookup the extent index at which we have to stop */
5840         if (direction == SHIFT_RIGHT) {
5841                 gotp = xfs_iext_bno_to_ext(ifp, stop_fsb, &stop_extent);
5842                 /* Make stop_extent exclusive of shift range */
5843                 stop_extent--;
5844         } else
5845                 stop_extent = total_extents;
5846
5847         /*
5848          * Look up the extent index for the fsb where we start shifting. We can
5849          * henceforth iterate with current_ext as extent list changes are locked
5850          * out via ilock.
5851          *
5852          * gotp can be null in 2 cases: 1) if there are no extents or 2)
5853          * *next_fsb lies in a hole beyond which there are no extents. Either
5854          * way, we are done.
5855          */
5856         gotp = xfs_iext_bno_to_ext(ifp, *next_fsb, &current_ext);
5857         if (!gotp) {
5858                 *done = 1;
5859                 goto del_cursor;
5860         }
5861
5862         /* some sanity checking before we finally start shifting extents */
5863         if ((direction == SHIFT_LEFT && current_ext >= stop_extent) ||
5864              (direction == SHIFT_RIGHT && current_ext <= stop_extent)) {
5865                 error = -EIO;
5866                 goto del_cursor;
5867         }
5868
5869         while (nexts++ < num_exts) {
5870                 error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb,
5871                                            &current_ext, gotp, cur, &logflags,
5872                                            direction, dfops);
5873                 if (error)
5874                         goto del_cursor;
5875                 /*
5876                  * If there was an extent merge during the shift, the extent
5877                  * count can change. Update the total and grade the next record.
5878                  */
5879                 if (direction == SHIFT_LEFT) {
5880                         total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
5881                         stop_extent = total_extents;
5882                 }
5883
5884                 if (current_ext == stop_extent) {
5885                         *done = 1;
5886                         *next_fsb = NULLFSBLOCK;
5887                         break;
5888                 }
5889                 gotp = xfs_iext_get_ext(ifp, current_ext);
5890         }
5891
5892         if (!*done) {
5893                 xfs_bmbt_get_all(gotp, &got);
5894                 *next_fsb = got.br_startoff;
5895         }
5896
5897 del_cursor:
5898         if (cur)
5899                 xfs_btree_del_cursor(cur,
5900                         error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
5901
5902         if (logflags)
5903                 xfs_trans_log_inode(tp, ip, logflags);
5904
5905         return error;
5906 }
5907
5908 /*
5909  * Splits an extent into two extents at split_fsb block such that it is
5910  * the first block of the current_ext. @current_ext is a target extent
5911  * to be split. @split_fsb is a block where the extents is split.
5912  * If split_fsb lies in a hole or the first block of extents, just return 0.
5913  */
5914 STATIC int
5915 xfs_bmap_split_extent_at(
5916         struct xfs_trans        *tp,
5917         struct xfs_inode        *ip,
5918         xfs_fileoff_t           split_fsb,
5919         xfs_fsblock_t           *firstfsb,
5920         struct xfs_defer_ops    *dfops)
5921 {
5922         int                             whichfork = XFS_DATA_FORK;
5923         struct xfs_btree_cur            *cur = NULL;
5924         struct xfs_bmbt_rec_host        *gotp;
5925         struct xfs_bmbt_irec            got;
5926         struct xfs_bmbt_irec            new; /* split extent */
5927         struct xfs_mount                *mp = ip->i_mount;
5928         struct xfs_ifork                *ifp;
5929         xfs_fsblock_t                   gotblkcnt; /* new block count for got */
5930         xfs_extnum_t                    current_ext;
5931         int                             error = 0;
5932         int                             logflags = 0;
5933         int                             i = 0;
5934
5935         if (unlikely(XFS_TEST_ERROR(
5936             (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
5937              XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
5938              mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
5939                 XFS_ERROR_REPORT("xfs_bmap_split_extent_at",
5940                                  XFS_ERRLEVEL_LOW, mp);
5941                 return -EFSCORRUPTED;
5942         }
5943
5944         if (XFS_FORCED_SHUTDOWN(mp))
5945                 return -EIO;
5946
5947         ifp = XFS_IFORK_PTR(ip, whichfork);
5948         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
5949                 /* Read in all the extents */
5950                 error = xfs_iread_extents(tp, ip, whichfork);
5951                 if (error)
5952                         return error;
5953         }
5954
5955         /*
5956          * gotp can be null in 2 cases: 1) if there are no extents
5957          * or 2) split_fsb lies in a hole beyond which there are
5958          * no extents. Either way, we are done.
5959          */
5960         gotp = xfs_iext_bno_to_ext(ifp, split_fsb, &current_ext);
5961         if (!gotp)
5962                 return 0;
5963
5964         xfs_bmbt_get_all(gotp, &got);
5965
5966         /*
5967          * Check split_fsb lies in a hole or the start boundary offset
5968          * of the extent.
5969          */
5970         if (got.br_startoff >= split_fsb)
5971                 return 0;
5972
5973         gotblkcnt = split_fsb - got.br_startoff;
5974         new.br_startoff = split_fsb;
5975         new.br_startblock = got.br_startblock + gotblkcnt;
5976         new.br_blockcount = got.br_blockcount - gotblkcnt;
5977         new.br_state = got.br_state;
5978
5979         if (ifp->if_flags & XFS_IFBROOT) {
5980                 cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
5981                 cur->bc_private.b.firstblock = *firstfsb;
5982                 cur->bc_private.b.dfops = dfops;
5983                 cur->bc_private.b.flags = 0;
5984                 error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
5985                                 got.br_startblock,
5986                                 got.br_blockcount,
5987                                 &i);
5988                 if (error)
5989                         goto del_cursor;
5990                 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
5991         }
5992
5993         xfs_bmbt_set_blockcount(gotp, gotblkcnt);
5994         got.br_blockcount = gotblkcnt;
5995
5996         logflags = XFS_ILOG_CORE;
5997         if (cur) {
5998                 error = xfs_bmbt_update(cur, got.br_startoff,
5999                                 got.br_startblock,
6000                                 got.br_blockcount,
6001                                 got.br_state);
6002                 if (error)
6003                         goto del_cursor;
6004         } else
6005                 logflags |= XFS_ILOG_DEXT;
6006
6007         /* Add new extent */
6008         current_ext++;
6009         xfs_iext_insert(ip, current_ext, 1, &new, 0);
6010         XFS_IFORK_NEXT_SET(ip, whichfork,
6011                            XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
6012
6013         if (cur) {
6014                 error = xfs_bmbt_lookup_eq(cur, new.br_startoff,
6015                                 new.br_startblock, new.br_blockcount,
6016                                 &i);
6017                 if (error)
6018                         goto del_cursor;
6019                 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, del_cursor);
6020                 cur->bc_rec.b.br_state = new.br_state;
6021
6022                 error = xfs_btree_insert(cur, &i);
6023                 if (error)
6024                         goto del_cursor;
6025                 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
6026         }
6027
6028         /*
6029          * Convert to a btree if necessary.
6030          */
6031         if (xfs_bmap_needs_btree(ip, whichfork)) {
6032                 int tmp_logflags; /* partial log flag return val */
6033
6034                 ASSERT(cur == NULL);
6035                 error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, dfops,
6036                                 &cur, 0, &tmp_logflags, whichfork);
6037                 logflags |= tmp_logflags;
6038         }
6039
6040 del_cursor:
6041         if (cur) {
6042                 cur->bc_private.b.allocated = 0;
6043                 xfs_btree_del_cursor(cur,
6044                                 error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
6045         }
6046
6047         if (logflags)
6048                 xfs_trans_log_inode(tp, ip, logflags);
6049         return error;
6050 }
6051
6052 int
6053 xfs_bmap_split_extent(
6054         struct xfs_inode        *ip,
6055         xfs_fileoff_t           split_fsb)
6056 {
6057         struct xfs_mount        *mp = ip->i_mount;
6058         struct xfs_trans        *tp;
6059         struct xfs_defer_ops    dfops;
6060         xfs_fsblock_t           firstfsb;
6061         int                     error;
6062
6063         error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
6064                         XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
6065         if (error)
6066                 return error;
6067
6068         xfs_ilock(ip, XFS_ILOCK_EXCL);
6069         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
6070
6071         xfs_defer_init(&dfops, &firstfsb);
6072
6073         error = xfs_bmap_split_extent_at(tp, ip, split_fsb,
6074                         &firstfsb, &dfops);
6075         if (error)
6076                 goto out;
6077
6078         error = xfs_defer_finish(&tp, &dfops, NULL);
6079         if (error)
6080                 goto out;
6081
6082         return xfs_trans_commit(tp);
6083
6084 out:
6085         xfs_defer_cancel(&dfops);
6086         xfs_trans_cancel(tp);
6087         return error;
6088 }
6089
6090 /* Deferred mapping is only for real extents in the data fork. */
6091 static bool
6092 xfs_bmap_is_update_needed(
6093         struct xfs_bmbt_irec    *bmap)
6094 {
6095         return  bmap->br_startblock != HOLESTARTBLOCK &&
6096                 bmap->br_startblock != DELAYSTARTBLOCK;
6097 }
6098
6099 /* Record a bmap intent. */
6100 static int
6101 __xfs_bmap_add(
6102         struct xfs_mount                *mp,
6103         struct xfs_defer_ops            *dfops,
6104         enum xfs_bmap_intent_type       type,
6105         struct xfs_inode                *ip,
6106         int                             whichfork,
6107         struct xfs_bmbt_irec            *bmap)
6108 {
6109         int                             error;
6110         struct xfs_bmap_intent          *bi;
6111
6112         trace_xfs_bmap_defer(mp,
6113                         XFS_FSB_TO_AGNO(mp, bmap->br_startblock),
6114                         type,
6115                         XFS_FSB_TO_AGBNO(mp, bmap->br_startblock),
6116                         ip->i_ino, whichfork,
6117                         bmap->br_startoff,
6118                         bmap->br_blockcount,
6119                         bmap->br_state);
6120
6121         bi = kmem_alloc(sizeof(struct xfs_bmap_intent), KM_SLEEP | KM_NOFS);
6122         INIT_LIST_HEAD(&bi->bi_list);
6123         bi->bi_type = type;
6124         bi->bi_owner = ip;
6125         bi->bi_whichfork = whichfork;
6126         bi->bi_bmap = *bmap;
6127
6128         error = xfs_defer_join(dfops, bi->bi_owner);
6129         if (error) {
6130                 kmem_free(bi);
6131                 return error;
6132         }
6133
6134         xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_BMAP, &bi->bi_list);
6135         return 0;
6136 }
6137
6138 /* Map an extent into a file. */
6139 int
6140 xfs_bmap_map_extent(
6141         struct xfs_mount        *mp,
6142         struct xfs_defer_ops    *dfops,
6143         struct xfs_inode        *ip,
6144         struct xfs_bmbt_irec    *PREV)
6145 {
6146         if (!xfs_bmap_is_update_needed(PREV))
6147                 return 0;
6148
6149         return __xfs_bmap_add(mp, dfops, XFS_BMAP_MAP, ip,
6150                         XFS_DATA_FORK, PREV);
6151 }
6152
6153 /* Unmap an extent out of a file. */
6154 int
6155 xfs_bmap_unmap_extent(
6156         struct xfs_mount        *mp,
6157         struct xfs_defer_ops    *dfops,
6158         struct xfs_inode        *ip,
6159         struct xfs_bmbt_irec    *PREV)
6160 {
6161         if (!xfs_bmap_is_update_needed(PREV))
6162                 return 0;
6163
6164         return __xfs_bmap_add(mp, dfops, XFS_BMAP_UNMAP, ip,
6165                         XFS_DATA_FORK, PREV);
6166 }
6167
6168 /*
6169  * Process one of the deferred bmap operations.  We pass back the
6170  * btree cursor to maintain our lock on the bmapbt between calls.
6171  */
6172 int
6173 xfs_bmap_finish_one(
6174         struct xfs_trans                *tp,
6175         struct xfs_defer_ops            *dfops,
6176         struct xfs_inode                *ip,
6177         enum xfs_bmap_intent_type       type,
6178         int                             whichfork,
6179         xfs_fileoff_t                   startoff,
6180         xfs_fsblock_t                   startblock,
6181         xfs_filblks_t                   blockcount,
6182         xfs_exntst_t                    state)
6183 {
6184         struct xfs_bmbt_irec            bmap;
6185         int                             nimaps = 1;
6186         xfs_fsblock_t                   firstfsb;
6187         int                             flags = XFS_BMAPI_REMAP;
6188         int                             done;
6189         int                             error = 0;
6190
6191         bmap.br_startblock = startblock;
6192         bmap.br_startoff = startoff;
6193         bmap.br_blockcount = blockcount;
6194         bmap.br_state = state;
6195
6196         trace_xfs_bmap_deferred(tp->t_mountp,
6197                         XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type,
6198                         XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
6199                         ip->i_ino, whichfork, startoff, blockcount, state);
6200
6201         if (whichfork != XFS_DATA_FORK && whichfork != XFS_ATTR_FORK)
6202                 return -EFSCORRUPTED;
6203         if (whichfork == XFS_ATTR_FORK)
6204                 flags |= XFS_BMAPI_ATTRFORK;
6205
6206         if (XFS_TEST_ERROR(false, tp->t_mountp,
6207                         XFS_ERRTAG_BMAP_FINISH_ONE,
6208                         XFS_RANDOM_BMAP_FINISH_ONE))
6209                 return -EIO;
6210
6211         switch (type) {
6212         case XFS_BMAP_MAP:
6213                 firstfsb = bmap.br_startblock;
6214                 error = xfs_bmapi_write(tp, ip, bmap.br_startoff,
6215                                         bmap.br_blockcount, flags, &firstfsb,
6216                                         bmap.br_blockcount, &bmap, &nimaps,
6217                                         dfops);
6218                 break;
6219         case XFS_BMAP_UNMAP:
6220                 error = xfs_bunmapi(tp, ip, bmap.br_startoff,
6221                                 bmap.br_blockcount, flags, 1, &firstfsb,
6222                                 dfops, &done);
6223                 ASSERT(done);
6224                 break;
6225         default:
6226                 ASSERT(0);
6227                 error = -EFSCORRUPTED;
6228         }
6229
6230         return error;
6231 }