net/mlx4: Fix firmware command timeout during interrupt test

[cascardo/linux.git] / fs / xfs / xfs_aops.c
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c

index 7575cfc..3e57a56 100644 (file)
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -31,6 +31,7 @@
  #include "xfs_bmap.h"
  #include "xfs_bmap_util.h"
  #include "xfs_bmap_btree.h"
+#include "xfs_reflink.h"
  #include <linux/gfp.h>
  #include <linux/mpage.h>
  #include <linux/pagevec.h>
@@ -39,6 +40,7 @@
  /* flags for direct write completions */
  #define XFS_DIO_FLAG_UNWRITTEN (1 << 0)
  #define XFS_DIO_FLAG_APPEND    (1 << 1)
+#define XFS_DIO_FLAG_COW       (1 << 2)
  
  /*
   * structure owned by writepages passed to individual writepage calls
@@ -200,7 +202,7 @@ xfs_setfilesize_trans_alloc(
   * Update on-disk file size now that data has been written to disk.
   */
  STATIC int
-xfs_setfilesize(
+__xfs_setfilesize(
         struct xfs_inode        *ip,
         struct xfs_trans        *tp,
         xfs_off_t               offset,
@@ -225,6 +227,23 @@ xfs_setfilesize(
         return xfs_trans_commit(tp);
  }
  
+int
+xfs_setfilesize(
+       struct xfs_inode        *ip,
+       xfs_off_t               offset,
+       size_t                  size)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_trans        *tp;
+       int                     error;
+
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
+       if (error)
+               return error;
+
+       return __xfs_setfilesize(ip, tp, offset, size);
+}
+
  STATIC int
  xfs_setfilesize_ioend(
         struct xfs_ioend        *ioend,
@@ -247,7 +266,7 @@ xfs_setfilesize_ioend(
                 return error;
         }
  
-       return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
+       return __xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
  }
  
  /*
@@ -269,6 +288,25 @@ xfs_end_io(
         if (XFS_FORCED_SHUTDOWN(ip->i_mount))
                 error = -EIO;
  
+       /*
+        * For a CoW extent, we need to move the mapping from the CoW fork
+        * to the data fork.  If instead an error happened, just dump the
+        * new blocks.
+        */
+       if (ioend->io_type == XFS_IO_COW) {
+               if (error)
+                       goto done;
+               if (ioend->io_bio->bi_error) {
+                       error = xfs_reflink_cancel_cow_range(ip,
+                                       ioend->io_offset, ioend->io_size);
+                       goto done;
+               }
+               error = xfs_reflink_end_cow(ip, ioend->io_offset,
+                               ioend->io_size);
+               if (error)
+                       goto done;
+       }
+
         /*
          * For unwritten extents we need to issue transactions to convert a
          * range to normal written extens after the data I/O has finished.
@@ -284,7 +322,8 @@ xfs_end_io(
         } else if (ioend->io_append_trans) {
                 error = xfs_setfilesize_ioend(ioend, error);
         } else {
-               ASSERT(!xfs_ioend_is_append(ioend));
+               ASSERT(!xfs_ioend_is_append(ioend) ||
+                      ioend->io_type == XFS_IO_COW);
         }
  
  done:
@@ -298,7 +337,7 @@ xfs_end_bio(
         struct xfs_ioend        *ioend = bio->bi_private;
         struct xfs_mount        *mp = XFS_I(ioend->io_inode)->i_mount;
  
-       if (ioend->io_type == XFS_IO_UNWRITTEN)
+       if (ioend->io_type == XFS_IO_UNWRITTEN || ioend->io_type == XFS_IO_COW)
                 queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
         else if (ioend->io_append_trans)
                 queue_work(mp->m_data_workqueue, &ioend->io_work);
@@ -324,6 +363,7 @@ xfs_map_blocks(
         if (XFS_FORCED_SHUTDOWN(mp))
                 return -EIO;
  
+       ASSERT(type != XFS_IO_COW);
         if (type == XFS_IO_UNWRITTEN)
                 bmapi_flags |= XFS_BMAPI_IGSTATE;
  
@@ -338,6 +378,13 @@ xfs_map_blocks(
         offset_fsb = XFS_B_TO_FSBT(mp, offset);
         error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
                                 imap, &nimaps, bmapi_flags);
+       /*
+        * Truncate an overwrite extent if there's a pending CoW
+        * reservation before the end of this extent.  This forces us
+        * to come back to writepage to take care of the CoW.
+        */
+       if (nimaps && type == XFS_IO_OVERWRITE)
+               xfs_reflink_trim_irec_to_next_cow(ip, offset_fsb, imap);
         xfs_iunlock(ip, XFS_ILOCK_SHARED);
  
         if (error)
@@ -345,7 +392,8 @@ xfs_map_blocks(
  
         if (type == XFS_IO_DELALLOC &&
             (!nimaps || isnullstartblock(imap->br_startblock))) {
-               error = xfs_iomap_write_allocate(ip, offset, imap);
+               error = xfs_iomap_write_allocate(ip, XFS_DATA_FORK, offset,
+                               imap);
                 if (!error)
                         trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
                 return error;
@@ -720,6 +768,56 @@ out_invalidate:
         return;
  }
  
+static int
+xfs_map_cow(
+       struct xfs_writepage_ctx *wpc,
+       struct inode            *inode,
+       loff_t                  offset,
+       unsigned int            *new_type)
+{
+       struct xfs_inode        *ip = XFS_I(inode);
+       struct xfs_bmbt_irec    imap;
+       bool                    is_cow = false, need_alloc = false;
+       int                     error;
+
+       /*
+        * If we already have a valid COW mapping keep using it.
+        */
+       if (wpc->io_type == XFS_IO_COW) {
+               wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, offset);
+               if (wpc->imap_valid) {
+                       *new_type = XFS_IO_COW;
+                       return 0;
+               }
+       }
+
+       /*
+        * Else we need to check if there is a COW mapping at this offset.
+        */
+       xfs_ilock(ip, XFS_ILOCK_SHARED);
+       is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap, &need_alloc);
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
+       if (!is_cow)
+               return 0;
+
+       /*
+        * And if the COW mapping has a delayed extent here we need to
+        * allocate real space for it now.
+        */
+       if (need_alloc) {
+               error = xfs_iomap_write_allocate(ip, XFS_COW_FORK, offset,
+                               &imap);
+               if (error)
+                       return error;
+       }
+
+       wpc->io_type = *new_type = XFS_IO_COW;
+       wpc->imap_valid = true;
+       wpc->imap = imap;
+       return 0;
+}
+
  /*
   * We implement an immediate ioend submission policy here to avoid needing to
   * chain multiple ioends and hence nest mempool allocations which can violate
@@ -752,6 +850,7 @@ xfs_writepage_map(
         int                     error = 0;
         int                     count = 0;
         int                     uptodate = 1;
+       unsigned int            new_type;
  
         bh = head = page_buffers(page);
         offset = page_offset(page);
@@ -772,22 +871,13 @@ xfs_writepage_map(
                         continue;
                 }
  
-               if (buffer_unwritten(bh)) {
-                       if (wpc->io_type != XFS_IO_UNWRITTEN) {
-                               wpc->io_type = XFS_IO_UNWRITTEN;
-                               wpc->imap_valid = false;
-                       }
-               } else if (buffer_delay(bh)) {
-                       if (wpc->io_type != XFS_IO_DELALLOC) {
-                               wpc->io_type = XFS_IO_DELALLOC;
-                               wpc->imap_valid = false;
-                       }
-               } else if (buffer_uptodate(bh)) {
-                       if (wpc->io_type != XFS_IO_OVERWRITE) {
-                               wpc->io_type = XFS_IO_OVERWRITE;
-                               wpc->imap_valid = false;
-                       }
-               } else {
+               if (buffer_unwritten(bh))
+                       new_type = XFS_IO_UNWRITTEN;
+               else if (buffer_delay(bh))
+                       new_type = XFS_IO_DELALLOC;
+               else if (buffer_uptodate(bh))
+                       new_type = XFS_IO_OVERWRITE;
+               else {
                         if (PageUptodate(page))
                                 ASSERT(buffer_mapped(bh));
                         /*
@@ -800,6 +890,17 @@ xfs_writepage_map(
                         continue;
                 }
  
+               if (xfs_is_reflink_inode(XFS_I(inode))) {
+                       error = xfs_map_cow(wpc, inode, offset, &new_type);
+                       if (error)
+                               goto out;
+               }
+
+               if (wpc->io_type != new_type) {
+                       wpc->io_type = new_type;
+                       wpc->imap_valid = false;
+               }
+
                 if (wpc->imap_valid)
                         wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
                                                          offset);
@@ -1090,18 +1191,24 @@ xfs_map_direct(
         struct inode            *inode,
         struct buffer_head      *bh_result,
         struct xfs_bmbt_irec    *imap,
-       xfs_off_t               offset)
+       xfs_off_t               offset,
+       bool                    is_cow)
  {
         uintptr_t               *flags = (uintptr_t *)&bh_result->b_private;
         xfs_off_t               size = bh_result->b_size;
  
         trace_xfs_get_blocks_map_direct(XFS_I(inode), offset, size,
-               ISUNWRITTEN(imap) ? XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, imap);
+               ISUNWRITTEN(imap) ? XFS_IO_UNWRITTEN : is_cow ? XFS_IO_COW :
+               XFS_IO_OVERWRITE, imap);
  
         if (ISUNWRITTEN(imap)) {
                 *flags |= XFS_DIO_FLAG_UNWRITTEN;
                 set_buffer_defer_completion(bh_result);
-       } else if (offset + size > i_size_read(inode) || offset + size < 0) {
+       } else if (is_cow) {
+               *flags |= XFS_DIO_FLAG_COW;
+               set_buffer_defer_completion(bh_result);
+       }
+       if (offset + size > i_size_read(inode) || offset + size < 0) {
                 *flags |= XFS_DIO_FLAG_APPEND;
                 set_buffer_defer_completion(bh_result);
         }
@@ -1147,6 +1254,44 @@ xfs_map_trim_size(
         bh_result->b_size = mapping_size;
  }
  
+/* Bounce unaligned directio writes to the page cache. */
+static int
+xfs_bounce_unaligned_dio_write(
+       struct xfs_inode        *ip,
+       xfs_fileoff_t           offset_fsb,
+       struct xfs_bmbt_irec    *imap)
+{
+       struct xfs_bmbt_irec    irec;
+       xfs_fileoff_t           delta;
+       bool                    shared;
+       bool                    x;
+       int                     error;
+
+       irec = *imap;
+       if (offset_fsb > irec.br_startoff) {
+               delta = offset_fsb - irec.br_startoff;
+               irec.br_blockcount -= delta;
+               irec.br_startblock += delta;
+               irec.br_startoff = offset_fsb;
+       }
+       error = xfs_reflink_trim_around_shared(ip, &irec, &shared, &x);
+       if (error)
+               return error;
+
+       /*
+        * We're here because we're trying to do a directio write to a
+        * region that isn't aligned to a filesystem block.  If any part
+        * of the extent is shared, fall back to buffered mode to handle
+        * the RMW.  This is done by returning -EREMCHG ("remote addr
+        * changed"), which is caught further up the call stack.
+        */
+       if (shared) {
+               trace_xfs_reflink_bounce_dio_write(ip, imap);
+               return -EREMCHG;
+       }
+       return 0;
+}
+
  STATIC int
  __xfs_get_blocks(
         struct inode            *inode,
@@ -1166,6 +1311,8 @@ __xfs_get_blocks(
         xfs_off_t               offset;
         ssize_t                 size;
         int                     new = 0;
+       bool                    is_cow = false;
+       bool                    need_alloc = false;
  
         BUG_ON(create && !direct);
  
@@ -1191,8 +1338,26 @@ __xfs_get_blocks(
         end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
         offset_fsb = XFS_B_TO_FSBT(mp, offset);
  
-       error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
-                               &imap, &nimaps, XFS_BMAPI_ENTIRE);
+       if (create && direct && xfs_is_reflink_inode(ip))
+               is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap,
+                                       &need_alloc);
+       if (!is_cow) {
+               error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
+                                       &imap, &nimaps, XFS_BMAPI_ENTIRE);
+               /*
+                * Truncate an overwrite extent if there's a pending CoW
+                * reservation before the end of this extent.  This
+                * forces us to come back to get_blocks to take care of
+                * the CoW.
+                */
+               if (create && direct && nimaps &&
+                   imap.br_startblock != HOLESTARTBLOCK &&
+                   imap.br_startblock != DELAYSTARTBLOCK &&
+                   !ISUNWRITTEN(&imap))
+                       xfs_reflink_trim_irec_to_next_cow(ip, offset_fsb,
+                                       &imap);
+       }
+       ASSERT(!need_alloc);
         if (error)
                 goto out_unlock;
  
@@ -1244,6 +1409,13 @@ __xfs_get_blocks(
         if (imap.br_startblock != HOLESTARTBLOCK &&
             imap.br_startblock != DELAYSTARTBLOCK &&
             (create || !ISUNWRITTEN(&imap))) {
+               if (create && direct && !is_cow) {
+                       error = xfs_bounce_unaligned_dio_write(ip, offset_fsb,
+                                       &imap);
+                       if (error)
+                               return error;
+               }
+
                 xfs_map_buffer(inode, bh_result, &imap, offset);
                 if (ISUNWRITTEN(&imap))
                         set_buffer_unwritten(bh_result);
@@ -1252,7 +1424,8 @@ __xfs_get_blocks(
                         if (dax_fault)
                                 ASSERT(!ISUNWRITTEN(&imap));
                         else
-                               xfs_map_direct(inode, bh_result, &imap, offset);
+                               xfs_map_direct(inode, bh_result, &imap, offset,
+                                               is_cow);
                 }
         }
  
@@ -1336,13 +1509,12 @@ xfs_end_io_direct_write(
  {
         struct inode            *inode = file_inode(iocb->ki_filp);
         struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
         uintptr_t               flags = (uintptr_t)private;
         int                     error = 0;
  
         trace_xfs_end_io_direct_write(ip, offset, size);
  
-       if (XFS_FORCED_SHUTDOWN(mp))
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
                 return -EIO;
  
         if (size <= 0)
@@ -1375,19 +1547,17 @@ xfs_end_io_direct_write(
                 i_size_write(inode, offset + size);
         spin_unlock(&ip->i_flags_lock);
  
+       if (flags & XFS_DIO_FLAG_COW)
+               error = xfs_reflink_end_cow(ip, offset, size);
         if (flags & XFS_DIO_FLAG_UNWRITTEN) {
                 trace_xfs_end_io_direct_write_unwritten(ip, offset, size);
  
                 error = xfs_iomap_write_unwritten(ip, offset, size);
-       } else if (flags & XFS_DIO_FLAG_APPEND) {
-               struct xfs_trans *tp;
-
+       }
+       if (flags & XFS_DIO_FLAG_APPEND) {
                 trace_xfs_end_io_direct_write_append(ip, offset, size);
  
-               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0,
-                               &tp);
-               if (!error)
-                       error = xfs_setfilesize(ip, tp, offset, size);
+               error = xfs_setfilesize(ip, offset, size);
         }
  
         return error;
@@ -1414,6 +1584,17 @@ xfs_vm_bmap(
  
         trace_xfs_vm_bmap(XFS_I(inode));
         xfs_ilock(ip, XFS_IOLOCK_SHARED);
+
+       /*
+        * The swap code (ab-)uses ->bmap to get a block mapping and then
+        * bypasseѕ the file system for actual I/O.  We really can't allow
+        * that on reflinks inodes, so we have to skip out here.  And yes,
+        * 0 is the magic code for a bmap error..
+        */
+       if (xfs_is_reflink_inode(ip)) {
+               xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+               return 0;
+       }
         filemap_write_and_wait(mapping);
         xfs_iunlock(ip, XFS_IOLOCK_SHARED);
         return generic_block_bmap(mapping, block, xfs_get_blocks);