Merge branch 'work.splice_read' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Linus Torvalds <torvalds@linux-foundation.org>

Mon, 10 Oct 2016 20:38:49 +0000 (13:38 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 10 Oct 2016 20:38:49 +0000 (13:38 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Mon, 10 Oct 2016 20:38:49 +0000 (13:38 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 10 Oct 2016 20:38:49 +0000 (13:38 -0700)
diff --combined fs/btrfs/inode.c

index 92346a4,ff15cf4..a0d3016
--- 1/fs/btrfs/inode.c
--- 2/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@@ -5072,7 -5072,7 +5072,7 @@@ static int btrfs_setattr(struct dentry 
         if (btrfs_root_readonly(root))
                 return -EROFS;
   
- -      err = inode_change_ok(inode, attr);
+ +      err = setattr_prepare(dentry, attr);
         if (err)
                 return err;
   
@@@ -8412,7 -8412,7 +8412,7 @@@ static int btrfs_submit_direct_hook(str
         if (!bio)
                 return -ENOMEM;
   
- -      bio_set_op_attrs(bio, bio_op(orig_bio), orig_bio->bi_opf);
+ +      bio_set_op_attrs(bio, bio_op(orig_bio), bio_flags(orig_bio));
         bio->bi_private = dip;
         bio->bi_end_io = btrfs_end_dio_bio;
         btrfs_io_bio(bio)->logical = file_offset;
@@@ -8450,8 -8450,7 +8450,8 @@@ next_block
                                                   start_sector, GFP_NOFS);
                         if (!bio)
                                 goto out_err;
- -                      bio_set_op_attrs(bio, bio_op(orig_bio), orig_bio->bi_opf);
+ +                      bio_set_op_attrs(bio, bio_op(orig_bio),
+ +                                       bio_flags(orig_bio));
                         bio->bi_private = dip;
                         bio->bi_end_io = btrfs_end_dio_bio;
                         btrfs_io_bio(bio)->logical = file_offset;
@@@ -8619,7 -8618,7 +8619,7 @@@ static ssize_t check_direct_IO(struct b
                 goto out;
   
         /* If this is a write we don't need to check anymore */
-       if (iov_iter_rw(iter) == WRITE)
+       if (iov_iter_rw(iter) != READ || !iter_is_iovec(iter))
                 return 0;
         /*
          * Check to make sure we don't have duplicate iov_base's in this
@@@ -10544,6 -10543,21 +10544,6 @@@ out_inode
   
   }
   
- -/* Inspired by filemap_check_errors() */
- -int btrfs_inode_check_errors(struct inode *inode)
- -{
- -      int ret = 0;
- -
- -      if (test_bit(AS_ENOSPC, &inode->i_mapping->flags) &&
- -          test_and_clear_bit(AS_ENOSPC, &inode->i_mapping->flags))
- -              ret = -ENOSPC;
- -      if (test_bit(AS_EIO, &inode->i_mapping->flags) &&
- -          test_and_clear_bit(AS_EIO, &inode->i_mapping->flags))
- -              ret = -EIO;
- -
- -      return ret;
- -}
- -
   static const struct inode_operations btrfs_dir_inode_operations = {
         .getattr        = btrfs_getattr,
         .lookup         = btrfs_lookup,
diff --combined fs/xfs/xfs_file.c

index 6251168,26cf153..2bc58b3
--- 1/fs/xfs/xfs_file.c
--- 2/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@@ -269,8 -269,6 +269,8 @@@ xfs_file_dio_aio_read
                 return -EINVAL;
         }
   
+ +      file_accessed(iocb->ki_filp);
+ +
         /*
          * Locking is a bit tricky here. If we take an exclusive lock for direct
          * IO, we effectively serialise all new concurrent read IO to this file
@@@ -319,12 -317,13 +319,12 @@@
         data = *to;
         ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
                         xfs_get_blocks_direct, NULL, NULL, 0);
-       if (ret > 0) {
+       if (ret >= 0) {
                 iocb->ki_pos += ret;
                 iov_iter_advance(to, ret);
         }
         xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
   
- -      file_accessed(iocb->ki_filp);
         return ret;
   }
   
@@@ -333,7 -332,10 +333,7 @@@ xfs_file_dax_read
         struct kiocb            *iocb,
         struct iov_iter         *to)
   {
- -      struct address_space    *mapping = iocb->ki_filp->f_mapping;
- -      struct inode            *inode = mapping->host;
- -      struct xfs_inode        *ip = XFS_I(inode);
- -      struct iov_iter         data = *to;
+ +      struct xfs_inode        *ip = XFS_I(iocb->ki_filp->f_mapping->host);
         size_t                  count = iov_iter_count(to);
         ssize_t                 ret = 0;
   
@@@ -343,7 -345,11 +343,7 @@@
                 return 0; /* skip atime */
   
         xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
- -      ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, NULL, 0);
- -      if (ret > 0) {
- -              iocb->ki_pos += ret;
- -              iov_iter_advance(to, ret);
- -      }
+ +      ret = iomap_dax_rw(iocb, to, &xfs_iomap_ops);
         xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
   
         file_accessed(iocb->ki_filp);
@@@ -666,32 -672,70 +666,32 @@@ xfs_file_dax_write
         struct kiocb            *iocb,
         struct iov_iter         *from)
   {
- -      struct address_space    *mapping = iocb->ki_filp->f_mapping;
- -      struct inode            *inode = mapping->host;
+ +      struct inode            *inode = iocb->ki_filp->f_mapping->host;
         struct xfs_inode        *ip = XFS_I(inode);
- -      struct xfs_mount        *mp = ip->i_mount;
- -      ssize_t                 ret = 0;
- -      int                     unaligned_io = 0;
- -      int                     iolock;
- -      struct iov_iter         data;
+ +      int                     iolock = XFS_IOLOCK_EXCL;
+ +      ssize_t                 ret, error = 0;
+ +      size_t                  count;
+ +      loff_t                  pos;
   
- -      /* "unaligned" here means not aligned to a filesystem block */
- -      if ((iocb->ki_pos & mp->m_blockmask) ||
- -          ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) {
- -              unaligned_io = 1;
- -              iolock = XFS_IOLOCK_EXCL;
- -      } else if (mapping->nrpages) {
- -              iolock = XFS_IOLOCK_EXCL;
- -      } else {
- -              iolock = XFS_IOLOCK_SHARED;
- -      }
         xfs_rw_ilock(ip, iolock);
- -
         ret = xfs_file_aio_write_checks(iocb, from, &iolock);
         if (ret)
                 goto out;
   
- -      /*
- -       * Yes, even DAX files can have page cache attached to them:  A zeroed
- -       * page is inserted into the pagecache when we have to serve a write
- -       * fault on a hole.  It should never be dirtied and can simply be
- -       * dropped from the pagecache once we get real data for the page.
- -       *
- -       * XXX: This is racy against mmap, and there's nothing we can do about
- -       * it. dax_do_io() should really do this invalidation internally as
- -       * it will know if we've allocated over a holei for this specific IO and
- -       * if so it needs to update the mapping tree and invalidate existing
- -       * PTEs over the newly allocated range. Remove this invalidation when
- -       * dax_do_io() is fixed up.
- -       */
- -      if (mapping->nrpages) {
- -              loff_t end = iocb->ki_pos + iov_iter_count(from) - 1;
+ +      pos = iocb->ki_pos;
+ +      count = iov_iter_count(from);
   
- -              ret = invalidate_inode_pages2_range(mapping,
- -                                                  iocb->ki_pos >> PAGE_SHIFT,
- -                                                  end >> PAGE_SHIFT);
- -              WARN_ON_ONCE(ret);
- -      }
+ +      trace_xfs_file_dax_write(ip, count, pos);
   
- -      if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) {
- -              xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
- -              iolock = XFS_IOLOCK_SHARED;
+ +      ret = iomap_dax_rw(iocb, from, &xfs_iomap_ops);
+ +      if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
+ +              i_size_write(inode, iocb->ki_pos);
+ +              error = xfs_setfilesize(ip, pos, ret);
         }
   
- -      trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
- -
- -      data = *from;
- -      ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
- -                      xfs_end_io_direct_write, 0);
- -      if (ret > 0) {
- -              iocb->ki_pos += ret;
- -              iov_iter_advance(from, ret);
- -      }
   out:
         xfs_rw_iunlock(ip, iolock);
- -      return ret;
+ +      return error ? error : ret;
   }
   
   STATIC ssize_t
@@@ -901,7 -945,7 +901,7 @@@ xfs_file_fallocate
   
                 iattr.ia_valid = ATTR_SIZE;
                 iattr.ia_size = new_size;
- -              error = xfs_setattr_size(ip, &iattr);
+ +              error = xfs_vn_setattr_size(file_dentry(file), &iattr);
                 if (error)
                         goto out_unlock;
         }
@@@ -1430,7 -1474,7 +1430,7 @@@ xfs_filemap_page_mkwrite
         xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
   
         if (IS_DAX(inode)) {
- -              ret = dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault);
+ +              ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops);
         } else {
                 ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
                 ret = block_page_mkwrite_return(ret);
@@@ -1464,7 -1508,7 +1464,7 @@@ xfs_filemap_fault
                  * changes to xfs_get_blocks_direct() to map unwritten extent
                  * ioend for conversion on read-only mappings.
                  */
- -              ret = dax_fault(vma, vmf, xfs_get_blocks_dax_fault);
+ +              ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops);
         } else
                 ret = filemap_fault(vma, vmf);
         xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
@@@ -1579,7 -1623,6 +1579,7 @@@ const struct file_operations xfs_file_o
         .open           = xfs_file_open,
         .release        = xfs_file_release,
         .fsync          = xfs_file_fsync,
+ +      .get_unmapped_area = thp_get_unmapped_area,
         .fallocate      = xfs_file_fallocate,
   };
   
diff --combined include/linux/uio.h

index a00d525,544c035..6e22b54
--- 1/include/linux/uio.h
--- 2/include/linux/uio.h
+++ b/include/linux/uio.h
@@@ -82,6 -82,7 +82,6 @@@ size_t iov_iter_copy_from_user_atomic(s
                 struct iov_iter *i, unsigned long offset, size_t bytes);
   void iov_iter_advance(struct iov_iter *i, size_t bytes);
   int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
- -#define iov_iter_fault_in_multipages_readable iov_iter_fault_in_readable
   size_t iov_iter_single_seg_count(const struct iov_iter *i);
   size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
                          struct iov_iter *i);
@@@ -109,12 -110,12 +109,12 @@@ int iov_iter_npages(const struct iov_it
   
   const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags);
   
- static inline size_t iov_iter_count(struct iov_iter *i)
+ static inline size_t iov_iter_count(const struct iov_iter *i)
   {
         return i->count;
   }
   
- static inline bool iter_is_iovec(struct iov_iter *i)
+ static inline bool iter_is_iovec(const struct iov_iter *i)
   {
         return !(i->type & (ITER_BVEC | ITER_KVEC | ITER_PIPE));
   }
diff --combined mm/filemap.c

index 2f7b778,6b965ef..849f459
--- 1/mm/filemap.c
--- 2/mm/filemap.c
+++ b/mm/filemap.c
@@@ -110,94 -110,36 +110,94 @@@
    *   ->tasklist_lock            (memory_failure, collect_procs_ao)
    */
   
+ +static int page_cache_tree_insert(struct address_space *mapping,
+ +                                struct page *page, void **shadowp)
+ +{
+ +      struct radix_tree_node *node;
+ +      void **slot;
+ +      int error;
+ +
+ +      error = __radix_tree_create(&mapping->page_tree, page->index, 0,
+ +                                  &node, &slot);
+ +      if (error)
+ +              return error;
+ +      if (*slot) {
+ +              void *p;
+ +
+ +              p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
+ +              if (!radix_tree_exceptional_entry(p))
+ +                      return -EEXIST;
+ +
+ +              mapping->nrexceptional--;
+ +              if (!dax_mapping(mapping)) {
+ +                      if (shadowp)
+ +                              *shadowp = p;
+ +                      if (node)
+ +                              workingset_node_shadows_dec(node);
+ +              } else {
+ +                      /* DAX can replace empty locked entry with a hole */
+ +                      WARN_ON_ONCE(p !=
+ +                              (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
+ +                                       RADIX_DAX_ENTRY_LOCK));
+ +                      /* DAX accounts exceptional entries as normal pages */
+ +                      if (node)
+ +                              workingset_node_pages_dec(node);
+ +                      /* Wakeup waiters for exceptional entry lock */
+ +                      dax_wake_mapping_entry_waiter(mapping, page->index,
+ +                                                    false);
+ +              }
+ +      }
+ +      radix_tree_replace_slot(slot, page);
+ +      mapping->nrpages++;
+ +      if (node) {
+ +              workingset_node_pages_inc(node);
+ +              /*
+ +               * Don't track node that contains actual pages.
+ +               *
+ +               * Avoid acquiring the list_lru lock if already
+ +               * untracked.  The list_empty() test is safe as
+ +               * node->private_list is protected by
+ +               * mapping->tree_lock.
+ +               */
+ +              if (!list_empty(&node->private_list))
+ +                      list_lru_del(&workingset_shadow_nodes,
+ +                                   &node->private_list);
+ +      }
+ +      return 0;
+ +}
+ +
   static void page_cache_tree_delete(struct address_space *mapping,
                                    struct page *page, void *shadow)
   {
- -      struct radix_tree_node *node;
         int i, nr = PageHuge(page) ? 1 : hpage_nr_pages(page);
   
         VM_BUG_ON_PAGE(!PageLocked(page), page);
         VM_BUG_ON_PAGE(PageTail(page), page);
         VM_BUG_ON_PAGE(nr != 1 && shadow, page);
   
- -      if (shadow) {
- -              mapping->nrexceptional += nr;
- -              /*
- -               * Make sure the nrexceptional update is committed before
- -               * the nrpages update so that final truncate racing
- -               * with reclaim does not see both counters 0 at the
- -               * same time and miss a shadow entry.
- -               */
- -              smp_wmb();
- -      }
- -      mapping->nrpages -= nr;
- -
         for (i = 0; i < nr; i++) {
- -              node = radix_tree_replace_clear_tags(&mapping->page_tree,
- -                              page->index + i, shadow);
+ +              struct radix_tree_node *node;
+ +              void **slot;
+ +
+ +              __radix_tree_lookup(&mapping->page_tree, page->index + i,
+ +                                  &node, &slot);
+ +
+ +              radix_tree_clear_tags(&mapping->page_tree, node, slot);
+ +
                 if (!node) {
                         VM_BUG_ON_PAGE(nr != 1, page);
- -                      return;
+ +                      /*
+ +                       * We need a node to properly account shadow
+ +                       * entries. Don't plant any without. XXX
+ +                       */
+ +                      shadow = NULL;
                 }
   
+ +              radix_tree_replace_slot(slot, shadow);
+ +
+ +              if (!node)
+ +                      break;
+ +
                 workingset_node_pages_dec(node);
                 if (shadow)
                         workingset_node_shadows_inc(node);
@@@ -221,18 -163,6 +221,18 @@@
                                         &node->private_list);
                 }
         }
+ +
+ +      if (shadow) {
+ +              mapping->nrexceptional += nr;
+ +              /*
+ +               * Make sure the nrexceptional update is committed before
+ +               * the nrpages update so that final truncate racing
+ +               * with reclaim does not see both counters 0 at the
+ +               * same time and miss a shadow entry.
+ +               */
+ +              smp_wmb();
+ +      }
+ +      mapping->nrpages -= nr;
   }
   
   /*
@@@ -631,8 -561,9 +631,8 @@@ int replace_page_cache_page(struct pag
   
                 spin_lock_irqsave(&mapping->tree_lock, flags);
                 __delete_from_page_cache(old, NULL);
- -              error = radix_tree_insert(&mapping->page_tree, offset, new);
+ +              error = page_cache_tree_insert(mapping, new, NULL);
                 BUG_ON(error);
- -              mapping->nrpages++;
   
                 /*
                  * hugetlb pages do not participate in page cache accounting.
@@@ -653,6 -584,62 +653,6 @@@
   }
   EXPORT_SYMBOL_GPL(replace_page_cache_page);
   
- -static int page_cache_tree_insert(struct address_space *mapping,
- -                                struct page *page, void **shadowp)
- -{
- -      struct radix_tree_node *node;
- -      void **slot;
- -      int error;
- -
- -      error = __radix_tree_create(&mapping->page_tree, page->index, 0,
- -                                  &node, &slot);
- -      if (error)
- -              return error;
- -      if (*slot) {
- -              void *p;
- -
- -              p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
- -              if (!radix_tree_exceptional_entry(p))
- -                      return -EEXIST;
- -
- -              mapping->nrexceptional--;
- -              if (!dax_mapping(mapping)) {
- -                      if (shadowp)
- -                              *shadowp = p;
- -                      if (node)
- -                              workingset_node_shadows_dec(node);
- -              } else {
- -                      /* DAX can replace empty locked entry with a hole */
- -                      WARN_ON_ONCE(p !=
- -                              (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY |
- -                                       RADIX_DAX_ENTRY_LOCK));
- -                      /* DAX accounts exceptional entries as normal pages */
- -                      if (node)
- -                              workingset_node_pages_dec(node);
- -                      /* Wakeup waiters for exceptional entry lock */
- -                      dax_wake_mapping_entry_waiter(mapping, page->index,
- -                                                    false);
- -              }
- -      }
- -      radix_tree_replace_slot(slot, page);
- -      mapping->nrpages++;
- -      if (node) {
- -              workingset_node_pages_inc(node);
- -              /*
- -               * Don't track node that contains actual pages.
- -               *
- -               * Avoid acquiring the list_lru lock if already
- -               * untracked.  The list_empty() test is safe as
- -               * node->private_list is protected by
- -               * mapping->tree_lock.
- -               */
- -              if (!list_empty(&node->private_list))
- -                      list_lru_del(&workingset_shadow_nodes,
- -                                   &node->private_list);
- -      }
- -      return 0;
- -}
- -
   static int __add_to_page_cache_locked(struct page *page,
                                       struct address_space *mapping,
                                       pgoff_t offset, gfp_t gfp_mask,
@@@ -1687,10 -1674,6 +1687,10 @@@ static ssize_t do_generic_file_read(str
         unsigned int prev_offset;
         int error = 0;
   
+ +      if (unlikely(*ppos >= inode->i_sb->s_maxbytes))
+ +              return -EINVAL;
+ +      iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
+ +
         index = *ppos >> PAGE_SHIFT;
         prev_index = ra->prev_pos >> PAGE_SHIFT;
         prev_offset = ra->prev_pos & (PAGE_SIZE-1);
@@@ -1725,9 -1708,7 +1725,9 @@@ find_page
                          * wait_on_page_locked is used to avoid unnecessarily
                          * serialisations and why it's safe.
                          */
- -                      wait_on_page_locked_killable(page);
+ +                      error = wait_on_page_locked_killable(page);
+ +                      if (unlikely(error))
+ +                              goto readpage_error;
                         if (PageUptodate(page))
                                 goto page_ok;
   
@@@ -1929,19 -1910,17 +1929,19 @@@ generic_file_read_iter(struct kiocb *io
         if (iocb->ki_flags & IOCB_DIRECT) {
                 struct address_space *mapping = file->f_mapping;
                 struct inode *inode = mapping->host;
+ +              struct iov_iter data = *iter;
                 loff_t size;
   
                 size = i_size_read(inode);
                 retval = filemap_write_and_wait_range(mapping, iocb->ki_pos,
                                         iocb->ki_pos + count - 1);
- -              if (!retval) {
- -                      struct iov_iter data = *iter;
- -                      retval = mapping->a_ops->direct_IO(iocb, &data);
- -              }
+ +              if (retval < 0)
+ +                      goto out;
   
-               if (retval > 0) {
+ +              file_accessed(file);
+ +
+ +              retval = mapping->a_ops->direct_IO(iocb, &data);
+               if (retval >= 0) {
                         iocb->ki_pos += retval;
                         iov_iter_advance(iter, retval);
                 }
@@@ -1956,8 -1935,10 +1956,8 @@@
                  * DAX files, so don't bother trying.
                  */
                 if (retval < 0 || !iov_iter_count(iter) || iocb->ki_pos >= size ||
- -                  IS_DAX(inode)) {
- -                      file_accessed(file);
+ +                  IS_DAX(inode))
                         goto out;
- -              }
         }
   
         retval = do_generic_file_read(file, &iocb->ki_pos, iter, retval);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 10 Oct 2016 20:38:49 +0000 (13:38 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 10 Oct 2016 20:38:49 +0000 (13:38 -0700)
		1	2
fs/btrfs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_file.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/uio.h	patch \|	diff1 \|	diff2 \|	blob \| history
mm/filemap.c	patch \|	diff1 \|	diff2 \|	blob \| history