Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[cascardo/linux.git] / fs / xfs / xfs_iomap.c
index 5839135..2af0dda 100644 (file)
@@ -15,6 +15,7 @@
  * along with this program; if not, write the Free Software Foundation,
  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
+#include <linux/iomap.h>
 #include "xfs.h"
 #include "xfs_fs.h"
 #include "xfs_shared.h"
@@ -22,6 +23,7 @@
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
+#include "xfs_defer.h"
 #include "xfs_inode.h"
 #include "xfs_btree.h"
 #include "xfs_bmap_btree.h"
@@ -127,7 +129,7 @@ xfs_iomap_write_direct(
        int             quota_flag;
        int             rt;
        xfs_trans_t     *tp;
-       xfs_bmap_free_t free_list;
+       struct xfs_defer_ops dfops;
        uint            qblocks, resblks, resrtextents;
        int             error;
        int             lockmode;
@@ -230,18 +232,18 @@ xfs_iomap_write_direct(
         * From this point onwards we overwrite the imap pointer that the
         * caller gave to us.
         */
-       xfs_bmap_init(&free_list, &firstfsb);
+       xfs_defer_init(&dfops, &firstfsb);
        nimaps = 1;
        error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
                                bmapi_flags, &firstfsb, resblks, imap,
-                               &nimaps, &free_list);
+                               &nimaps, &dfops);
        if (error)
                goto out_bmap_cancel;
 
        /*
         * Complete the transaction
         */
-       error = xfs_bmap_finish(&tp, &free_list, NULL);
+       error = xfs_defer_finish(&tp, &dfops, NULL);
        if (error)
                goto out_bmap_cancel;
 
@@ -265,7 +267,7 @@ out_unlock:
        return error;
 
 out_bmap_cancel:
-       xfs_bmap_cancel(&free_list);
+       xfs_defer_cancel(&dfops);
        xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
 out_trans_cancel:
        xfs_trans_cancel(tp);
@@ -684,7 +686,7 @@ xfs_iomap_write_allocate(
        xfs_fileoff_t   offset_fsb, last_block;
        xfs_fileoff_t   end_fsb, map_start_fsb;
        xfs_fsblock_t   first_block;
-       xfs_bmap_free_t free_list;
+       struct xfs_defer_ops    dfops;
        xfs_filblks_t   count_fsb;
        xfs_trans_t     *tp;
        int             nimaps;
@@ -713,12 +715,16 @@ xfs_iomap_write_allocate(
                 * is in the delayed allocation extent on which we sit
                 * but before our buffer starts.
                 */
-
                nimaps = 0;
                while (nimaps == 0) {
                        nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
-
-                       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, nres,
+                       /*
+                        * We have already reserved space for the extent and any
+                        * indirect blocks when creating the delalloc extent,
+                        * there is no need to reserve space in this transaction
+                        * again.
+                        */
+                       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0,
                                        0, XFS_TRANS_RESERVE, &tp);
                        if (error)
                                return error;
@@ -726,7 +732,7 @@ xfs_iomap_write_allocate(
                        xfs_ilock(ip, XFS_ILOCK_EXCL);
                        xfs_trans_ijoin(tp, ip, 0);
 
-                       xfs_bmap_init(&free_list, &first_block);
+                       xfs_defer_init(&dfops, &first_block);
 
                        /*
                         * it is possible that the extents have changed since
@@ -782,11 +788,11 @@ xfs_iomap_write_allocate(
                        error = xfs_bmapi_write(tp, ip, map_start_fsb,
                                                count_fsb, 0, &first_block,
                                                nres, imap, &nimaps,
-                                               &free_list);
+                                               &dfops);
                        if (error)
                                goto trans_cancel;
 
-                       error = xfs_bmap_finish(&tp, &free_list, NULL);
+                       error = xfs_defer_finish(&tp, &dfops, NULL);
                        if (error)
                                goto trans_cancel;
 
@@ -820,7 +826,7 @@ xfs_iomap_write_allocate(
        }
 
 trans_cancel:
-       xfs_bmap_cancel(&free_list);
+       xfs_defer_cancel(&dfops);
        xfs_trans_cancel(tp);
 error0:
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -841,7 +847,7 @@ xfs_iomap_write_unwritten(
        int             nimaps;
        xfs_trans_t     *tp;
        xfs_bmbt_irec_t imap;
-       xfs_bmap_free_t free_list;
+       struct xfs_defer_ops dfops;
        xfs_fsize_t     i_size;
        uint            resblks;
        int             error;
@@ -885,11 +891,11 @@ xfs_iomap_write_unwritten(
                /*
                 * Modify the unwritten extent state of the buffer.
                 */
-               xfs_bmap_init(&free_list, &firstfsb);
+               xfs_defer_init(&dfops, &firstfsb);
                nimaps = 1;
                error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
                                        XFS_BMAPI_CONVERT, &firstfsb, resblks,
-                                       &imap, &nimaps, &free_list);
+                                       &imap, &nimaps, &dfops);
                if (error)
                        goto error_on_bmapi_transaction;
 
@@ -908,7 +914,7 @@ xfs_iomap_write_unwritten(
                        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
                }
 
-               error = xfs_bmap_finish(&tp, &free_list, NULL);
+               error = xfs_defer_finish(&tp, &dfops, NULL);
                if (error)
                        goto error_on_bmapi_transaction;
 
@@ -935,8 +941,217 @@ xfs_iomap_write_unwritten(
        return 0;
 
 error_on_bmapi_transaction:
-       xfs_bmap_cancel(&free_list);
+       xfs_defer_cancel(&dfops);
        xfs_trans_cancel(tp);
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
        return error;
 }
+
+void
+xfs_bmbt_to_iomap(
+       struct xfs_inode        *ip,
+       struct iomap            *iomap,
+       struct xfs_bmbt_irec    *imap)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+
+       if (imap->br_startblock == HOLESTARTBLOCK) {
+               iomap->blkno = IOMAP_NULL_BLOCK;
+               iomap->type = IOMAP_HOLE;
+       } else if (imap->br_startblock == DELAYSTARTBLOCK) {
+               iomap->blkno = IOMAP_NULL_BLOCK;
+               iomap->type = IOMAP_DELALLOC;
+       } else {
+               iomap->blkno = xfs_fsb_to_db(ip, imap->br_startblock);
+               if (imap->br_state == XFS_EXT_UNWRITTEN)
+                       iomap->type = IOMAP_UNWRITTEN;
+               else
+                       iomap->type = IOMAP_MAPPED;
+       }
+       iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff);
+       iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
+       iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip));
+}
+
+static inline bool imap_needs_alloc(struct xfs_bmbt_irec *imap, int nimaps)
+{
+       return !nimaps ||
+               imap->br_startblock == HOLESTARTBLOCK ||
+               imap->br_startblock == DELAYSTARTBLOCK;
+}
+
+static int
+xfs_file_iomap_begin(
+       struct inode            *inode,
+       loff_t                  offset,
+       loff_t                  length,
+       unsigned                flags,
+       struct iomap            *iomap)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_bmbt_irec    imap;
+       xfs_fileoff_t           offset_fsb, end_fsb;
+       int                     nimaps = 1, error = 0;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -EIO;
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+       ASSERT(offset <= mp->m_super->s_maxbytes);
+       if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
+               length = mp->m_super->s_maxbytes - offset;
+       offset_fsb = XFS_B_TO_FSBT(mp, offset);
+       end_fsb = XFS_B_TO_FSB(mp, offset + length);
+
+       error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
+                              &nimaps, XFS_BMAPI_ENTIRE);
+       if (error) {
+               xfs_iunlock(ip, XFS_ILOCK_EXCL);
+               return error;
+       }
+
+       if ((flags & IOMAP_WRITE) && imap_needs_alloc(&imap, nimaps)) {
+               /*
+                * We cap the maximum length we map here to MAX_WRITEBACK_PAGES
+                * pages to keep the chunks of work done where somewhat symmetric
+                * with the work writeback does. This is a completely arbitrary
+                * number pulled out of thin air as a best guess for initial
+                * testing.
+                *
+                * Note that the values needs to be less than 32-bits wide until
+                * the lower level functions are updated.
+                */
+               length = min_t(loff_t, length, 1024 * PAGE_SIZE);
+               if (xfs_get_extsz_hint(ip)) {
+                       /*
+                        * xfs_iomap_write_direct() expects the shared lock. It
+                        * is unlocked on return.
+                        */
+                       xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
+                       error = xfs_iomap_write_direct(ip, offset, length, &imap,
+                                       nimaps);
+               } else {
+                       error = xfs_iomap_write_delay(ip, offset, length, &imap);
+                       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+               }
+
+               if (error)
+                       return error;
+
+               trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
+       } else {
+               ASSERT(nimaps);
+
+               xfs_iunlock(ip, XFS_ILOCK_EXCL);
+               trace_xfs_iomap_found(ip, offset, length, 0, &imap);
+       }
+
+       xfs_bmbt_to_iomap(ip, iomap, &imap);
+       return 0;
+}
+
+static int
+xfs_file_iomap_end_delalloc(
+       struct xfs_inode        *ip,
+       loff_t                  offset,
+       loff_t                  length,
+       ssize_t                 written)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       xfs_fileoff_t           start_fsb;
+       xfs_fileoff_t           end_fsb;
+       int                     error = 0;
+
+       start_fsb = XFS_B_TO_FSB(mp, offset + written);
+       end_fsb = XFS_B_TO_FSB(mp, offset + length);
+
+       /*
+        * Trim back delalloc blocks if we didn't manage to write the whole
+        * range reserved.
+        *
+        * We don't need to care about racing delalloc as we hold i_mutex
+        * across the reserve/allocate/unreserve calls. If there are delalloc
+        * blocks in the range, they are ours.
+        */
+       if (start_fsb < end_fsb) {
+               xfs_ilock(ip, XFS_ILOCK_EXCL);
+               error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
+                                              end_fsb - start_fsb);
+               xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+               if (error && !XFS_FORCED_SHUTDOWN(mp)) {
+                       xfs_alert(mp, "%s: unable to clean up ino %lld",
+                               __func__, ip->i_ino);
+                       return error;
+               }
+       }
+
+       return 0;
+}
+
+static int
+xfs_file_iomap_end(
+       struct inode            *inode,
+       loff_t                  offset,
+       loff_t                  length,
+       ssize_t                 written,
+       unsigned                flags,
+       struct iomap            *iomap)
+{
+       if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
+               return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
+                               length, written);
+       return 0;
+}
+
+struct iomap_ops xfs_iomap_ops = {
+       .iomap_begin            = xfs_file_iomap_begin,
+       .iomap_end              = xfs_file_iomap_end,
+};
+
+static int
+xfs_xattr_iomap_begin(
+       struct inode            *inode,
+       loff_t                  offset,
+       loff_t                  length,
+       unsigned                flags,
+       struct iomap            *iomap)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_mount        *mp = ip->i_mount;
+       xfs_fileoff_t           offset_fsb = XFS_B_TO_FSBT(mp, offset);
+       xfs_fileoff_t           end_fsb = XFS_B_TO_FSB(mp, offset + length);
+       struct xfs_bmbt_irec    imap;
+       int                     nimaps = 1, error = 0;
+       unsigned                lockmode;
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -EIO;
+
+       lockmode = xfs_ilock_data_map_shared(ip);
+
+       /* if there are no attribute fork or extents, return ENOENT */
+       if (XFS_IFORK_Q(ip) || !ip->i_d.di_anextents) {
+               error = -ENOENT;
+               goto out_unlock;
+       }
+
+       ASSERT(ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL);
+       error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
+                              &nimaps, XFS_BMAPI_ENTIRE | XFS_BMAPI_ATTRFORK);
+out_unlock:
+       xfs_iunlock(ip, lockmode);
+
+       if (!error) {
+               ASSERT(nimaps);
+               xfs_bmbt_to_iomap(ip, iomap, &imap);
+       }
+
+       return error;
+}
+
+struct iomap_ops xfs_xattr_iomap_ops = {
+       .iomap_begin            = xfs_xattr_iomap_begin,
+};