Merge tag 'xfs-for-linus-4.7-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 26 May 2016 17:13:40 +0000 (10:13 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 26 May 2016 17:13:40 +0000 (10:13 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 26 May 2016 17:13:40 +0000 (10:13 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 26 May 2016 17:13:40 +0000 (10:13 -0700)
diff --combined fs/namei.c

index 5375571,cdd0419..15b124c
--- 1/fs/namei.c
--- 2/fs/namei.c
+++ b/fs/namei.c
@@@ -265,7 -265,7 +265,7 @@@ static int check_acl(struct inode *inod
                 if (!acl)
                         return -EAGAIN;
                 /* no ->get_acl() calls in RCU mode... */
- -              if (acl == ACL_NOT_CACHED)
+ +              if (is_uncached_acl(acl))
                         return -ECHILD;
                 return posix_acl_permission(inode, acl, mask & ~MAY_NOT_BLOCK);
         }
@@@ -1603,42 -1603,32 +1603,42 @@@ static struct dentry *lookup_slow(cons
                                   struct dentry *dir,
                                   unsigned int flags)
   {
- -      struct dentry *dentry;
- -      inode_lock(dir->d_inode);
- -      dentry = d_lookup(dir, name);
- -      if (unlikely(dentry)) {
+ +      struct dentry *dentry = ERR_PTR(-ENOENT), *old;
+ +      struct inode *inode = dir->d_inode;
+ +      DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ +
+ +      inode_lock_shared(inode);
+ +      /* Don't go there if it's already dead */
+ +      if (unlikely(IS_DEADDIR(inode)))
+ +              goto out;
+ +again:
+ +      dentry = d_alloc_parallel(dir, name, &wq);
+ +      if (IS_ERR(dentry))
+ +              goto out;
+ +      if (unlikely(!d_in_lookup(dentry))) {
                 if ((dentry->d_flags & DCACHE_OP_REVALIDATE) &&
                     !(flags & LOOKUP_NO_REVAL)) {
                         int error = d_revalidate(dentry, flags);
                         if (unlikely(error <= 0)) {
- -                              if (!error)
+ +                              if (!error) {
                                         d_invalidate(dentry);
+ +                                      dput(dentry);
+ +                                      goto again;
+ +                              }
                                 dput(dentry);
                                 dentry = ERR_PTR(error);
                         }
                 }
- -              if (dentry) {
- -                      inode_unlock(dir->d_inode);
- -                      return dentry;
+ +      } else {
+ +              old = inode->i_op->lookup(inode, dentry, flags);
+ +              d_lookup_done(dentry);
+ +              if (unlikely(old)) {
+ +                      dput(dentry);
+ +                      dentry = old;
                 }
         }
- -      dentry = d_alloc(dir, name);
- -      if (unlikely(!dentry)) {
- -              inode_unlock(dir->d_inode);
- -              return ERR_PTR(-ENOMEM);
- -      }
- -      dentry = lookup_real(dir->d_inode, dentry, flags);
- -      inode_unlock(dir->d_inode);
+ +out:
+ +      inode_unlock_shared(inode);
         return dentry;
   }
   
@@@ -1750,17 -1740,15 +1750,17 @@@ static int walk_component(struct nameid
                                           nd->flags);
                 if (IS_ERR(path.dentry))
                         return PTR_ERR(path.dentry);
- -              if (unlikely(d_is_negative(path.dentry))) {
- -                      dput(path.dentry);
- -                      return -ENOENT;
- -              }
+ +
                 path.mnt = nd->path.mnt;
                 err = follow_managed(&path, nd);
                 if (unlikely(err < 0))
                         return err;
   
+ +              if (unlikely(d_is_negative(path.dentry))) {
+ +                      path_to_nameidata(&path, nd);
+ +                      return -ENOENT;
+ +              }
+ +
                 seq = 0;        /* we are already out of RCU mode */
                 inode = d_backing_inode(path.dentry);
         }
@@@ -1804,49 -1792,30 +1804,49 @@@ static inline unsigned int fold_hash(un
         return hash_64(hash, 32);
   }
   
+ +/*
+ + * This is George Marsaglia's XORSHIFT generator.
+ + * It implements a maximum-period LFSR in only a few
+ + * instructions.  It also has the property (required
+ + * by hash_name()) that mix_hash(0) = 0.
+ + */
+ +static inline unsigned long mix_hash(unsigned long hash)
+ +{
+ +      hash ^= hash << 13;
+ +      hash ^= hash >> 7;
+ +      hash ^= hash << 17;
+ +      return hash;
+ +}
+ +
   #else /* 32-bit case */
   
   #define fold_hash(x) (x)
   
+ +static inline unsigned long mix_hash(unsigned long hash)
+ +{
+ +      hash ^= hash << 13;
+ +      hash ^= hash >> 17;
+ +      hash ^= hash << 5;
+ +      return hash;
+ +}
+ +
   #endif
   
   unsigned int full_name_hash(const unsigned char *name, unsigned int len)
   {
- -      unsigned long a, mask;
- -      unsigned long hash = 0;
+ +      unsigned long a, hash = 0;
   
         for (;;) {
                 a = load_unaligned_zeropad(name);
                 if (len < sizeof(unsigned long))
                         break;
- -              hash += a;
- -              hash *= 9;
+ +              hash = mix_hash(hash + a);
                 name += sizeof(unsigned long);
                 len -= sizeof(unsigned long);
                 if (!len)
                         goto done;
         }
- -      mask = bytemask_from_count(len);
- -      hash += mask & a;
+ +      hash += a & bytemask_from_count(len);
   done:
         return fold_hash(hash);
   }
@@@ -1864,7 -1833,7 +1864,7 @@@ static inline u64 hash_name(const char 
         hash = a = 0;
         len = -sizeof(unsigned long);
         do {
- -              hash = (hash + a) * 9;
+ +              hash = mix_hash(hash + a);
                 len += sizeof(unsigned long);
                 a = load_unaligned_zeropad(name+len);
                 b = a ^ REPEAT_BYTE('/');
@@@ -2295,33 -2264,6 +2295,33 @@@ int vfs_path_lookup(struct dentry *dent
   }
   EXPORT_SYMBOL(vfs_path_lookup);
   
+ +/**
+ + * lookup_hash - lookup single pathname component on already hashed name
+ + * @name:     name and hash to lookup
+ + * @base:     base directory to lookup from
+ + *
+ + * The name must have been verified and hashed (see lookup_one_len()).  Using
+ + * this after just full_name_hash() is unsafe.
+ + *
+ + * This function also doesn't check for search permission on base directory.
+ + *
+ + * Use lookup_one_len_unlocked() instead, unless you really know what you are
+ + * doing.
+ + *
+ + * Do not hold i_mutex; this helper takes i_mutex if necessary.
+ + */
+ +struct dentry *lookup_hash(const struct qstr *name, struct dentry *base)
+ +{
+ +      struct dentry *ret;
+ +
+ +      ret = lookup_dcache(name, base, 0);
+ +      if (!ret)
+ +              ret = lookup_slow(name, base, 0);
+ +
+ +      return ret;
+ +}
+ +EXPORT_SYMBOL(lookup_hash);
+ +
   /**
    * lookup_one_len - filesystem helper to lookup single pathname component
    * @name:     pathname component to lookup
@@@ -2393,6 -2335,7 +2393,6 @@@ struct dentry *lookup_one_len_unlocked(
         struct qstr this;
         unsigned int c;
         int err;
- -      struct dentry *ret;
   
         this.name = name;
         this.len = len;
@@@ -2424,7 -2367,10 +2424,7 @@@
         if (err)
                 return ERR_PTR(err);
   
- -      ret = lookup_dcache(&this, base, 0);
- -      if (!ret)
- -              ret = lookup_slow(&this, base, 0);
- -      return ret;
+ +      return lookup_hash(&this, base);
   }
   EXPORT_SYMBOL(lookup_one_len_unlocked);
   
@@@ -2707,7 -2653,7 +2707,7 @@@ struct dentry *lock_rename(struct dentr
                 return NULL;
         }
   
- -      mutex_lock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
+ +      mutex_lock(&p1->d_sb->s_vfs_rename_mutex);
   
         p = d_ancestor(p2, p1);
         if (p) {
@@@ -2734,7 -2680,7 +2734,7 @@@ void unlock_rename(struct dentry *p1, s
         inode_unlock(p1->d_inode);
         if (p1 != p2) {
                 inode_unlock(p2->d_inode);
- -              mutex_unlock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
+ +              mutex_unlock(&p1->d_sb->s_vfs_rename_mutex);
         }
   }
   EXPORT_SYMBOL(unlock_rename);
@@@ -2837,7 -2783,7 +2837,7 @@@ static inline int open_to_namei_flags(i
         return flag;
   }
   
- -static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode)
+ +static int may_o_create(const struct path *dir, struct dentry *dentry, umode_t mode)
   {
         int error = security_path_mknod(dir, dentry, mode, 0);
         if (error)
@@@ -2866,56 -2812,155 +2866,56 @@@
   static int atomic_open(struct nameidata *nd, struct dentry *dentry,
                         struct path *path, struct file *file,
                         const struct open_flags *op,
- -                      bool got_write, bool need_lookup,
+ +                      int open_flag, umode_t mode,
                         int *opened)
   {
+ +      struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
         struct inode *dir =  nd->path.dentry->d_inode;
- -      unsigned open_flag = open_to_namei_flags(op->open_flag);
- -      umode_t mode;
         int error;
- -      int acc_mode;
- -      int create_error = 0;
- -      struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
- -      bool excl;
- -
- -      BUG_ON(dentry->d_inode);
- -
- -      /* Don't create child dentry for a dead directory. */
- -      if (unlikely(IS_DEADDIR(dir))) {
- -              error = -ENOENT;
- -              goto out;
- -      }
- -
- -      mode = op->mode;
- -      if ((open_flag & O_CREAT) && !IS_POSIXACL(dir))
- -              mode &= ~current_umask();
   
- -      excl = (open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT);
- -      if (excl)
+ +      if (!(~open_flag & (O_EXCL | O_CREAT))) /* both O_EXCL and O_CREAT */
                 open_flag &= ~O_TRUNC;
   
- -      /*
- -       * Checking write permission is tricky, bacuse we don't know if we are
- -       * going to actually need it: O_CREAT opens should work as long as the
- -       * file exists.  But checking existence breaks atomicity.  The trick is
- -       * to check access and if not granted clear O_CREAT from the flags.
- -       *
- -       * Another problem is returing the "right" error value (e.g. for an
- -       * O_EXCL open we want to return EEXIST not EROFS).
- -       */
- -      if (((open_flag & (O_CREAT | O_TRUNC)) ||
- -          (open_flag & O_ACCMODE) != O_RDONLY) && unlikely(!got_write)) {
- -              if (!(open_flag & O_CREAT)) {
- -                      /*
- -                       * No O_CREATE -> atomicity not a requirement -> fall
- -                       * back to lookup + open
- -                       */
- -                      goto no_open;
- -              } else if (open_flag & (O_EXCL | O_TRUNC)) {
- -                      /* Fall back and fail with the right error */
- -                      create_error = -EROFS;
- -                      goto no_open;
- -              } else {
- -                      /* No side effects, safe to clear O_CREAT */
- -                      create_error = -EROFS;
- -                      open_flag &= ~O_CREAT;
- -              }
- -      }
- -
- -      if (open_flag & O_CREAT) {
- -              error = may_o_create(&nd->path, dentry, mode);
- -              if (error) {
- -                      create_error = error;
- -                      if (open_flag & O_EXCL)
- -                              goto no_open;
- -                      open_flag &= ~O_CREAT;
- -              }
- -      }
- -
         if (nd->flags & LOOKUP_DIRECTORY)
                 open_flag |= O_DIRECTORY;
   
         file->f_path.dentry = DENTRY_NOT_SET;
         file->f_path.mnt = nd->path.mnt;
- -      error = dir->i_op->atomic_open(dir, dentry, file, open_flag, mode,
- -                                    opened);
- -      if (error < 0) {
- -              if (create_error && error == -ENOENT)
- -                      error = create_error;
- -              goto out;
- -      }
- -
- -      if (error) {    /* returned 1, that is */
+ +      error = dir->i_op->atomic_open(dir, dentry, file,
+ +                                     open_to_namei_flags(open_flag),
+ +                                     mode, opened);
+ +      d_lookup_done(dentry);
+ +      if (!error) {
+ +              /*
+ +               * We didn't have the inode before the open, so check open
+ +               * permission here.
+ +               */
+ +              int acc_mode = op->acc_mode;
+ +              if (*opened & FILE_CREATED) {
+ +                      WARN_ON(!(open_flag & O_CREAT));
+ +                      fsnotify_create(dir, dentry);
+ +                      acc_mode = 0;
+ +              }
+ +              error = may_open(&file->f_path, acc_mode, open_flag);
+ +              if (WARN_ON(error > 0))
+ +                      error = -EINVAL;
+ +      } else if (error > 0) {
                 if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) {
                         error = -EIO;
- -                      goto out;
- -              }
- -              if (file->f_path.dentry) {
- -                      dput(dentry);
- -                      dentry = file->f_path.dentry;
- -              }
- -              if (*opened & FILE_CREATED)
- -                      fsnotify_create(dir, dentry);
- -              if (!dentry->d_inode) {
- -                      WARN_ON(*opened & FILE_CREATED);
- -                      if (create_error) {
- -                              error = create_error;
- -                              goto out;
- -                      }
                 } else {
- -                      if (excl && !(*opened & FILE_CREATED)) {
- -                              error = -EEXIST;
- -                              goto out;
+ +                      if (file->f_path.dentry) {
+ +                              dput(dentry);
+ +                              dentry = file->f_path.dentry;
                         }
+ +                      if (*opened & FILE_CREATED)
+ +                              fsnotify_create(dir, dentry);
+ +                      path->dentry = dentry;
+ +                      path->mnt = nd->path.mnt;
+ +                      return 1;
                 }
- -              goto looked_up;
- -      }
- -
- -      /*
- -       * We didn't have the inode before the open, so check open permission
- -       * here.
- -       */
- -      acc_mode = op->acc_mode;
- -      if (*opened & FILE_CREATED) {
- -              WARN_ON(!(open_flag & O_CREAT));
- -              fsnotify_create(dir, dentry);
- -              acc_mode = 0;
         }
- -      error = may_open(&file->f_path, acc_mode, open_flag);
- -      if (error)
- -              fput(file);
- -
- -out:
         dput(dentry);
         return error;
- -
- -no_open:
- -      if (need_lookup) {
- -              dentry = lookup_real(dir, dentry, nd->flags);
- -              if (IS_ERR(dentry))
- -                      return PTR_ERR(dentry);
- -
- -              if (create_error) {
- -                      int open_flag = op->open_flag;
- -
- -                      error = create_error;
- -                      if ((open_flag & O_EXCL)) {
- -                              if (!dentry->d_inode)
- -                                      goto out;
- -                      } else if (!dentry->d_inode) {
- -                              goto out;
- -                      } else if ((open_flag & O_TRUNC) &&
- -                                 d_is_reg(dentry)) {
- -                              goto out;
- -                      }
- -                      /* will fail later, go on to get the right error */
- -              }
- -      }
- -looked_up:
- -      path->dentry = dentry;
- -      path->mnt = nd->path.mnt;
- -      return 1;
   }
   
   /*
@@@ -2943,118 -2988,62 +2943,118 @@@ static int lookup_open(struct nameidat
   {
         struct dentry *dir = nd->path.dentry;
         struct inode *dir_inode = dir->d_inode;
+ +      int open_flag = op->open_flag;
         struct dentry *dentry;
- -      int error;
- -      bool need_lookup = false;
+ +      int error, create_error = 0;
+ +      umode_t mode = op->mode;
+ +      DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ +
+ +      if (unlikely(IS_DEADDIR(dir_inode)))
+ +              return -ENOENT;
   
         *opened &= ~FILE_CREATED;
- -      dentry = lookup_dcache(&nd->last, dir, nd->flags);
- -      if (IS_ERR(dentry))
- -              return PTR_ERR(dentry);
+ +      dentry = d_lookup(dir, &nd->last);
+ +      for (;;) {
+ +              if (!dentry) {
+ +                      dentry = d_alloc_parallel(dir, &nd->last, &wq);
+ +                      if (IS_ERR(dentry))
+ +                              return PTR_ERR(dentry);
+ +              }
+ +              if (d_in_lookup(dentry))
+ +                      break;
   
- -      if (!dentry) {
- -              dentry = d_alloc(dir, &nd->last);
- -              if (unlikely(!dentry))
- -                      return -ENOMEM;
- -              need_lookup = true;
- -      } else if (dentry->d_inode) {
+ +              if (!(dentry->d_flags & DCACHE_OP_REVALIDATE))
+ +                      break;
+ +
+ +              error = d_revalidate(dentry, nd->flags);
+ +              if (likely(error > 0))
+ +                      break;
+ +              if (error)
+ +                      goto out_dput;
+ +              d_invalidate(dentry);
+ +              dput(dentry);
+ +              dentry = NULL;
+ +      }
+ +      if (dentry->d_inode) {
                 /* Cached positive dentry: will open in f_op->open */
                 goto out_no_open;
         }
   
- -      if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) {
- -              return atomic_open(nd, dentry, path, file, op, got_write,
- -                                 need_lookup, opened);
+ +      /*
+ +       * Checking write permission is tricky, bacuse we don't know if we are
+ +       * going to actually need it: O_CREAT opens should work as long as the
+ +       * file exists.  But checking existence breaks atomicity.  The trick is
+ +       * to check access and if not granted clear O_CREAT from the flags.
+ +       *
+ +       * Another problem is returing the "right" error value (e.g. for an
+ +       * O_EXCL open we want to return EEXIST not EROFS).
+ +       */
+ +      if (open_flag & O_CREAT) {
+ +              if (!IS_POSIXACL(dir->d_inode))
+ +                      mode &= ~current_umask();
+ +              if (unlikely(!got_write)) {
+ +                      create_error = -EROFS;
+ +                      open_flag &= ~O_CREAT;
+ +                      if (open_flag & (O_EXCL | O_TRUNC))
+ +                              goto no_open;
+ +                      /* No side effects, safe to clear O_CREAT */
+ +              } else {
+ +                      create_error = may_o_create(&nd->path, dentry, mode);
+ +                      if (create_error) {
+ +                              open_flag &= ~O_CREAT;
+ +                              if (open_flag & O_EXCL)
+ +                                      goto no_open;
+ +                      }
+ +              }
+ +      } else if ((open_flag & (O_TRUNC|O_WRONLY|O_RDWR)) &&
+ +                 unlikely(!got_write)) {
+ +              /*
+ +               * No O_CREATE -> atomicity not a requirement -> fall
+ +               * back to lookup + open
+ +               */
+ +              goto no_open;
         }
   
- -      if (need_lookup) {
- -              BUG_ON(dentry->d_inode);
+ +      if (dir_inode->i_op->atomic_open) {
+ +              error = atomic_open(nd, dentry, path, file, op, open_flag,
+ +                                  mode, opened);
+ +              if (unlikely(error == -ENOENT) && create_error)
+ +                      error = create_error;
+ +              return error;
+ +      }
   
- -              dentry = lookup_real(dir_inode, dentry, nd->flags);
- -              if (IS_ERR(dentry))
- -                      return PTR_ERR(dentry);
+ +no_open:
+ +      if (d_in_lookup(dentry)) {
+ +              struct dentry *res = dir_inode->i_op->lookup(dir_inode, dentry,
+ +                                                           nd->flags);
+ +              d_lookup_done(dentry);
+ +              if (unlikely(res)) {
+ +                      if (IS_ERR(res)) {
+ +                              error = PTR_ERR(res);
+ +                              goto out_dput;
+ +                      }
+ +                      dput(dentry);
+ +                      dentry = res;
+ +              }
         }
   
         /* Negative dentry, just create the file */
- -      if (!dentry->d_inode && (op->open_flag & O_CREAT)) {
- -              umode_t mode = op->mode;
- -              if (!IS_POSIXACL(dir->d_inode))
- -                      mode &= ~current_umask();
- -              /*
- -               * This write is needed to ensure that a
- -               * rw->ro transition does not occur between
- -               * the time when the file is created and when
- -               * a permanent write count is taken through
- -               * the 'struct file' in finish_open().
- -               */
- -              if (!got_write) {
- -                      error = -EROFS;
- -                      goto out_dput;
- -              }
+ +      if (!dentry->d_inode && (open_flag & O_CREAT)) {
                 *opened |= FILE_CREATED;
- -              error = security_path_mknod(&nd->path, dentry, mode, 0);
- -              if (error)
+ +              audit_inode_child(dir_inode, dentry, AUDIT_TYPE_CHILD_CREATE);
+ +              if (!dir_inode->i_op->create) {
+ +                      error = -EACCES;
                         goto out_dput;
- -              error = vfs_create(dir->d_inode, dentry, mode,
- -                                 nd->flags & LOOKUP_EXCL);
+ +              }
+ +              error = dir_inode->i_op->create(dir_inode, dentry, mode,
+ +                                              open_flag & O_EXCL);
                 if (error)
                         goto out_dput;
+ +              fsnotify_create(dir_inode, dentry);
+ +      }
+ +      if (unlikely(create_error) && !dentry->d_inode) {
+ +              error = create_error;
+ +              goto out_dput;
         }
   out_no_open:
         path->dentry = dentry;
@@@ -3126,7 -3115,7 +3126,7 @@@ static int do_last(struct nameidata *nd
         }
   
   retry_lookup:
- -      if (op->open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {
+ +      if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {
                 error = mnt_want_write(nd->path.mnt);
                 if (!error)
                         got_write = true;
@@@ -3136,15 -3125,9 +3136,15 @@@
                  * dropping this one anyway.
                  */
         }
- -      inode_lock(dir->d_inode);
+ +      if (open_flag & O_CREAT)
+ +              inode_lock(dir->d_inode);
+ +      else
+ +              inode_lock_shared(dir->d_inode);
         error = lookup_open(nd, &path, file, op, got_write, opened);
- -      inode_unlock(dir->d_inode);
+ +      if (open_flag & O_CREAT)
+ +              inode_unlock(dir->d_inode);
+ +      else
+ +              inode_unlock_shared(dir->d_inode);
   
         if (error <= 0) {
                 if (error)
@@@ -3224,6 -3207,10 +3224,6 @@@ finish_open
                 return error;
         }
         audit_inode(nd->name, nd->path.dentry, 0);
- -      if (unlikely(d_is_symlink(nd->path.dentry)) && !(open_flag & O_PATH)) {
- -              error = -ELOOP;
- -              goto out;
- -      }
         error = -EISDIR;
         if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry))
                 goto out;
@@@ -3240,9 -3227,11 +3240,9 @@@
                 got_write = true;
         }
   finish_open_created:
- -      if (likely(!(open_flag & O_PATH))) {
- -              error = may_open(&nd->path, acc_mode, open_flag);
- -              if (error)
- -                      goto out;
- -      }
+ +      error = may_open(&nd->path, acc_mode, open_flag);
+ +      if (error)
+ +              goto out;
         BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
         error = vfs_open(&nd->path, file, current_cred());
         if (!error) {
@@@ -3254,13 -3243,18 +3254,13 @@@
         }
   opened:
         error = open_check_o_direct(file);
- -      if (error)
- -              goto exit_fput;
- -      error = ima_file_check(file, op->acc_mode, *opened);
- -      if (error)
- -              goto exit_fput;
- -
- -      if (will_truncate) {
+ +      if (!error)
+ +              error = ima_file_check(file, op->acc_mode, *opened);
+ +      if (!error && will_truncate)
                 error = handle_truncate(file);
- -              if (error)
- -                      goto exit_fput;
- -      }
   out:
+ +      if (unlikely(error) && (*opened & FILE_OPENED))
+ +              fput(file);
         if (unlikely(error > 0)) {
                 WARN_ON(1);
                 error = -EINVAL;
@@@ -3270,6 -3264,10 +3270,6 @@@
         path_put(&save_parent);
         return error;
   
- -exit_fput:
- -      fput(file);
- -      goto out;
- -
   stale_open:
         /* If no saved parent or already retried then can't retry */
         if (!save_parent.dentry || retried)
@@@ -3347,18 -3345,6 +3347,18 @@@ out
         return error;
   }
   
+ +static int do_o_path(struct nameidata *nd, unsigned flags, struct file *file)
+ +{
+ +      struct path path;
+ +      int error = path_lookupat(nd, flags, &path);
+ +      if (!error) {
+ +              audit_inode(nd->name, path.dentry, 0);
+ +              error = vfs_open(&path, file, current_cred());
+ +              path_put(&path);
+ +      }
+ +      return error;
+ +}
+ +
   static struct file *path_openat(struct nameidata *nd,
                         const struct open_flags *op, unsigned flags)
   {
@@@ -3378,13 -3364,6 +3378,13 @@@
                 goto out2;
         }
   
+ +      if (unlikely(file->f_flags & O_PATH)) {
+ +              error = do_o_path(nd, flags, file);
+ +              if (!error)
+ +                      opened |= FILE_OPENED;
+ +              goto out2;
+ +      }
+ +
         s = path_init(nd, flags);
         if (IS_ERR(s)) {
                 put_filp(file);
@@@ -3627,8 -3606,6 +3627,8 @@@ retry
         switch (mode & S_IFMT) {
                 case 0: case S_IFREG:
                         error = vfs_create(path.dentry->d_inode,dentry,mode,true);
+ +                      if (!error)
+ +                              ima_post_path_mknod(dentry);
                         break;
                 case S_IFCHR: case S_IFBLK:
                         error = vfs_mknod(path.dentry->d_inode,dentry,mode,
@@@ -4234,11 -4211,7 +4234,11 @@@ int vfs_rename(struct inode *old_dir, s
         bool new_is_dir = false;
         unsigned max_links = new_dir->i_sb->s_max_links;
   
- -      if (source == target)
+ +      /*
+ +       * Check source == target.
+ +       * On overlayfs need to look at underlying inodes.
+ +       */
+ +      if (vfs_select_inode(old_dentry, 0) == vfs_select_inode(new_dentry, 0))
                 return 0;
   
         error = may_delete(old_dir, old_dentry, is_dir);
@@@ -4542,7 -4515,6 +4542,6 @@@ int readlink_copy(char __user *buffer, 
   out:
         return len;
   }
- EXPORT_SYMBOL(readlink_copy);
   
   /*
    * A helper for ->readlink().  This should be used *ONLY* for symlinks that
diff --combined fs/xfs/libxfs/xfs_bmap.c

index ce41d7f,e7ec8cc..932381c
--- 1/fs/xfs/libxfs/xfs_bmap.c
--- 2/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@@ -1121,15 -1121,14 +1121,14 @@@ xfs_bmap_add_attrfork
   
         mp = ip->i_mount;
         ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
-       tp = xfs_trans_alloc(mp, XFS_TRANS_ADDAFORK);
+ 
         blks = XFS_ADDAFORK_SPACE_RES(mp);
-       if (rsvd)
-               tp->t_flags |= XFS_TRANS_RESERVE;
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0);
-       if (error) {
-               xfs_trans_cancel(tp);
+ 
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_addafork, blks, 0,
+                       rsvd ? XFS_TRANS_RESERVE : 0, &tp);
+       if (error)
                 return error;
-       }
+ 
         xfs_ilock(ip, XFS_ILOCK_EXCL);
         error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
                         XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
@@@ -3742,11 -3741,11 +3741,11 @@@ xfs_bmap_btalloc
                 args.prod = align;
                 if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod)))
                         args.mod = (xfs_extlen_t)(args.prod - args.mod);
- -      } else if (mp->m_sb.sb_blocksize >= PAGE_CACHE_SIZE) {
+ +      } else if (mp->m_sb.sb_blocksize >= PAGE_SIZE) {
                 args.prod = 1;
                 args.mod = 0;
         } else {
- -              args.prod = PAGE_CACHE_SIZE >> mp->m_sb.sb_blocklog;
+ +              args.prod = PAGE_SIZE >> mp->m_sb.sb_blocklog;
                 if ((args.mod = (xfs_extlen_t)(do_mod(ap->offset, args.prod))))
                         args.mod = (xfs_extlen_t)(args.prod - args.mod);
         }
@@@ -6026,13 -6025,10 +6025,10 @@@ xfs_bmap_split_extent
         xfs_fsblock_t           firstfsb;
         int                     error;
   
-       tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
-                       XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
-       if (error) {
-               xfs_trans_cancel(tp);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
+                       XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
+       if (error)
                 return error;
-       }
   
         xfs_ilock(ip, XFS_ILOCK_EXCL);
         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
diff --combined fs/xfs/xfs_aops.c

index c535887,1d6eca5..4c463b9
--- 1/fs/xfs/xfs_aops.c
--- 2/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@@ -84,23 -84,71 +84,71 @@@ xfs_find_bdev_for_inode
   }
   
   /*
-  * We're now finished for good with this ioend structure.
-  * Update the page state via the associated buffer_heads,
-  * release holds on the inode and bio, and finally free
-  * up memory.  Do not use the ioend after this.
+  * We're now finished for good with this page.  Update the page state via the
+  * associated buffer_heads, paying attention to the start and end offsets that
+  * we need to process on the page.
+  */
+ static void
+ xfs_finish_page_writeback(
+       struct inode            *inode,
+       struct bio_vec          *bvec,
+       int                     error)
+ {
+       unsigned int            end = bvec->bv_offset + bvec->bv_len - 1;
+       struct buffer_head      *head, *bh;
+       unsigned int            off = 0;
+ 
+       ASSERT(bvec->bv_offset < PAGE_SIZE);
+       ASSERT((bvec->bv_offset & ((1 << inode->i_blkbits) - 1)) == 0);
+       ASSERT(end < PAGE_SIZE);
+       ASSERT((bvec->bv_len & ((1 << inode->i_blkbits) - 1)) == 0);
+ 
+       bh = head = page_buffers(bvec->bv_page);
+ 
+       do {
+               if (off < bvec->bv_offset)
+                       goto next_bh;
+               if (off > end)
+                       break;
+               bh->b_end_io(bh, !error);
+ next_bh:
+               off += bh->b_size;
+       } while ((bh = bh->b_this_page) != head);
+ }
+ 
+ /*
+  * We're now finished for good with this ioend structure.  Update the page
+  * state, release holds on bios, and finally free up memory.  Do not use the
+  * ioend after this.
    */
   STATIC void
   xfs_destroy_ioend(
-       xfs_ioend_t             *ioend)
+       struct xfs_ioend        *ioend,
+       int                     error)
   {
-       struct buffer_head      *bh, *next;
+       struct inode            *inode = ioend->io_inode;
+       struct bio              *last = ioend->io_bio;
+       struct bio              *bio, *next;
   
-       for (bh = ioend->io_buffer_head; bh; bh = next) {
-               next = bh->b_private;
-               bh->b_end_io(bh, !ioend->io_error);
-       }
+       for (bio = &ioend->io_inline_bio; bio; bio = next) {
+               struct bio_vec  *bvec;
+               int             i;
+ 
+               /*
+                * For the last bio, bi_private points to the ioend, so we
+                * need to explicitly end the iteration here.
+                */
+               if (bio == last)
+                       next = NULL;
+               else
+                       next = bio->bi_private;
   
-       mempool_free(ioend, xfs_ioend_pool);
+               /* walk each page on bio, ending page IO on them */
+               bio_for_each_segment_all(bvec, bio, i)
+                       xfs_finish_page_writeback(inode, bvec, error);
+ 
+               bio_put(bio);
+       }
   }
   
   /*
@@@ -120,13 -168,9 +168,9 @@@ xfs_setfilesize_trans_alloc
         struct xfs_trans        *tp;
         int                     error;
   
-       tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
- 
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
-       if (error) {
-               xfs_trans_cancel(tp);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
+       if (error)
                 return error;
-       }
   
         ioend->io_append_trans = tp;
   
@@@ -174,7 -218,8 +218,8 @@@ xfs_setfilesize
   
   STATIC int
   xfs_setfilesize_ioend(
-       struct xfs_ioend        *ioend)
+       struct xfs_ioend        *ioend,
+       int                     error)
   {
         struct xfs_inode        *ip = XFS_I(ioend->io_inode);
         struct xfs_trans        *tp = ioend->io_append_trans;
@@@ -188,36 -233,14 +233,14 @@@
         __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
   
         /* we abort the update if there was an IO error */
-       if (ioend->io_error) {
+       if (error) {
                 xfs_trans_cancel(tp);
-               return ioend->io_error;
+               return error;
         }
   
         return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
   }
   
- /*
-  * Schedule IO completion handling on the final put of an ioend.
-  *
-  * If there is no work to do we might as well call it a day and free the
-  * ioend right now.
-  */
- STATIC void
- xfs_finish_ioend(
-       struct xfs_ioend        *ioend)
- {
-       if (atomic_dec_and_test(&ioend->io_remaining)) {
-               struct xfs_mount        *mp = XFS_I(ioend->io_inode)->i_mount;
- 
-               if (ioend->io_type == XFS_IO_UNWRITTEN)
-                       queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
-               else if (ioend->io_append_trans)
-                       queue_work(mp->m_data_workqueue, &ioend->io_work);
-               else
-                       xfs_destroy_ioend(ioend);
-       }
- }
- 
   /*
    * IO write completion.
    */
@@@ -225,16 -248,17 +248,17 @@@ STATIC voi
   xfs_end_io(
         struct work_struct *work)
   {
-       xfs_ioend_t     *ioend = container_of(work, xfs_ioend_t, io_work);
-       struct xfs_inode *ip = XFS_I(ioend->io_inode);
-       int             error = 0;
+       struct xfs_ioend        *ioend =
+               container_of(work, struct xfs_ioend, io_work);
+       struct xfs_inode        *ip = XFS_I(ioend->io_inode);
+       int                     error = ioend->io_bio->bi_error;
   
         /*
          * Set an error if the mount has shut down and proceed with end I/O
          * processing so it can perform whatever cleanups are necessary.
          */
         if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-               ioend->io_error = -EIO;
+               error = -EIO;
   
         /*
          * For unwritten extents we need to issue transactions to convert a
@@@ -244,55 -268,33 +268,33 @@@
          * on error.
          */
         if (ioend->io_type == XFS_IO_UNWRITTEN) {
-               if (ioend->io_error)
+               if (error)
                         goto done;
                 error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
                                                   ioend->io_size);
         } else if (ioend->io_append_trans) {
-               error = xfs_setfilesize_ioend(ioend);
+               error = xfs_setfilesize_ioend(ioend, error);
         } else {
                 ASSERT(!xfs_ioend_is_append(ioend));
         }
   
   done:
-       if (error)
-               ioend->io_error = error;
-       xfs_destroy_ioend(ioend);
+       xfs_destroy_ioend(ioend, error);
   }
   
- /*
-  * Allocate and initialise an IO completion structure.
-  * We need to track unwritten extent write completion here initially.
-  * We'll need to extend this for updating the ondisk inode size later
-  * (vs. incore size).
-  */
- STATIC xfs_ioend_t *
- xfs_alloc_ioend(
-       struct inode            *inode,
-       unsigned int            type)
+ STATIC void
+ xfs_end_bio(
+       struct bio              *bio)
   {
-       xfs_ioend_t             *ioend;
- 
-       ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS);
- 
-       /*
-        * Set the count to 1 initially, which will prevent an I/O
-        * completion callback from happening before we have started
-        * all the I/O from calling the completion routine too early.
-        */
-       atomic_set(&ioend->io_remaining, 1);
-       ioend->io_error = 0;
-       INIT_LIST_HEAD(&ioend->io_list);
-       ioend->io_type = type;
-       ioend->io_inode = inode;
-       ioend->io_buffer_head = NULL;
-       ioend->io_buffer_tail = NULL;
-       ioend->io_offset = 0;
-       ioend->io_size = 0;
-       ioend->io_append_trans = NULL;
+       struct xfs_ioend        *ioend = bio->bi_private;
+       struct xfs_mount        *mp = XFS_I(ioend->io_inode)->i_mount;
   
-       INIT_WORK(&ioend->io_work, xfs_end_io);
-       return ioend;
+       if (ioend->io_type == XFS_IO_UNWRITTEN)
+               queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
+       else if (ioend->io_append_trans)
+               queue_work(mp->m_data_workqueue, &ioend->io_work);
+       else
+               xfs_destroy_ioend(ioend, bio->bi_error);
   }
   
   STATIC int
@@@ -364,50 -366,6 +366,6 @@@ xfs_imap_valid
                 offset < imap->br_startoff + imap->br_blockcount;
   }
   
- /*
-  * BIO completion handler for buffered IO.
-  */
- STATIC void
- xfs_end_bio(
-       struct bio              *bio)
- {
-       xfs_ioend_t             *ioend = bio->bi_private;
- 
-       if (!ioend->io_error)
-               ioend->io_error = bio->bi_error;
- 
-       /* Toss bio and pass work off to an xfsdatad thread */
-       bio->bi_private = NULL;
-       bio->bi_end_io = NULL;
-       bio_put(bio);
- 
-       xfs_finish_ioend(ioend);
- }
- 
- STATIC void
- xfs_submit_ioend_bio(
-       struct writeback_control *wbc,
-       xfs_ioend_t             *ioend,
-       struct bio              *bio)
- {
-       atomic_inc(&ioend->io_remaining);
-       bio->bi_private = ioend;
-       bio->bi_end_io = xfs_end_bio;
-       submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
- }
- 
- STATIC struct bio *
- xfs_alloc_ioend_bio(
-       struct buffer_head      *bh)
- {
-       struct bio              *bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
- 
-       ASSERT(bio->bi_private == NULL);
-       bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
-       bio->bi_bdev = bh->b_bdev;
-       return bio;
- }
- 
   STATIC void
   xfs_start_buffer_writeback(
         struct buffer_head      *bh)
@@@ -452,28 -410,35 +410,35 @@@ static inline int xfs_bio_add_buffer(st
   }
   
   /*
-  * Submit all of the bios for an ioend. We are only passed a single ioend at a
-  * time; the caller is responsible for chaining prior to submission.
+  * Submit the bio for an ioend. We are passed an ioend with a bio attached to
+  * it, and we submit that bio. The ioend may be used for multiple bio
+  * submissions, so we only want to allocate an append transaction for the ioend
+  * once. In the case of multiple bio submission, each bio will take an IO
+  * reference to the ioend to ensure that the ioend completion is only done once
+  * all bios have been submitted and the ioend is really done.
    *
    * If @fail is non-zero, it means that we have a situation where some part of
    * the submission process has failed after we have marked paged for writeback
-  * and unlocked them. In this situation, we need to fail the ioend chain rather
-  * than submit it to IO. This typically only happens on a filesystem shutdown.
+  * and unlocked them. In this situation, we need to fail the bio and ioend
+  * rather than submit it to IO. This typically only happens on a filesystem
+  * shutdown.
    */
   STATIC int
   xfs_submit_ioend(
         struct writeback_control *wbc,
-       xfs_ioend_t             *ioend,
+       struct xfs_ioend        *ioend,
         int                     status)
   {
-       struct buffer_head      *bh;
-       struct bio              *bio;
-       sector_t                lastblock = 0;
- 
         /* Reserve log space if we might write beyond the on-disk inode size. */
         if (!status &&
-            ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend))
+           ioend->io_type != XFS_IO_UNWRITTEN &&
+           xfs_ioend_is_append(ioend) &&
+           !ioend->io_append_trans)
                 status = xfs_setfilesize_trans_alloc(ioend);
+ 
+       ioend->io_bio->bi_private = ioend;
+       ioend->io_bio->bi_end_io = xfs_end_bio;
+ 
         /*
          * If we are failing the IO now, just mark the ioend with an
          * error and finish it. This will run IO completion immediately
@@@ -481,33 -446,73 +446,73 @@@
          * time.
          */
         if (status) {
-               ioend->io_error = status;
-               xfs_finish_ioend(ioend);
+               ioend->io_bio->bi_error = status;
+               bio_endio(ioend->io_bio);
                 return status;
         }
   
-       bio = NULL;
-       for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
+       submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE,
+                  ioend->io_bio);
+       return 0;
+ }
   
-               if (!bio) {
- retry:
-                       bio = xfs_alloc_ioend_bio(bh);
-               } else if (bh->b_blocknr != lastblock + 1) {
-                       xfs_submit_ioend_bio(wbc, ioend, bio);
-                       goto retry;
-               }
+ static void
+ xfs_init_bio_from_bh(
+       struct bio              *bio,
+       struct buffer_head      *bh)
+ {
+       bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+       bio->bi_bdev = bh->b_bdev;
+ }
   
-               if (xfs_bio_add_buffer(bio, bh) != bh->b_size) {
-                       xfs_submit_ioend_bio(wbc, ioend, bio);
-                       goto retry;
-               }
+ static struct xfs_ioend *
+ xfs_alloc_ioend(
+       struct inode            *inode,
+       unsigned int            type,
+       xfs_off_t               offset,
+       struct buffer_head      *bh)
+ {
+       struct xfs_ioend        *ioend;
+       struct bio              *bio;
   
-               lastblock = bh->b_blocknr;
-       }
-       if (bio)
-               xfs_submit_ioend_bio(wbc, ioend, bio);
-       xfs_finish_ioend(ioend);
-       return 0;
+       bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, xfs_ioend_bioset);
+       xfs_init_bio_from_bh(bio, bh);
+ 
+       ioend = container_of(bio, struct xfs_ioend, io_inline_bio);
+       INIT_LIST_HEAD(&ioend->io_list);
+       ioend->io_type = type;
+       ioend->io_inode = inode;
+       ioend->io_size = 0;
+       ioend->io_offset = offset;
+       INIT_WORK(&ioend->io_work, xfs_end_io);
+       ioend->io_append_trans = NULL;
+       ioend->io_bio = bio;
+       return ioend;
+ }
+ 
+ /*
+  * Allocate a new bio, and chain the old bio to the new one.
+  *
+  * Note that we have to do perform the chaining in this unintuitive order
+  * so that the bi_private linkage is set up in the right direction for the
+  * traversal in xfs_destroy_ioend().
+  */
+ static void
+ xfs_chain_bio(
+       struct xfs_ioend        *ioend,
+       struct writeback_control *wbc,
+       struct buffer_head      *bh)
+ {
+       struct bio *new;
+ 
+       new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES);
+       xfs_init_bio_from_bh(new, bh);
+ 
+       bio_chain(ioend->io_bio, new);
+       bio_get(ioend->io_bio);         /* for xfs_destroy_ioend */
+       submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE,
+                  ioend->io_bio);
+       ioend->io_bio = new;
   }
   
   /*
@@@ -523,27 -528,24 +528,24 @@@ xfs_add_to_ioend
         struct buffer_head      *bh,
         xfs_off_t               offset,
         struct xfs_writepage_ctx *wpc,
+       struct writeback_control *wbc,
         struct list_head        *iolist)
   {
         if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type ||
             bh->b_blocknr != wpc->last_block + 1 ||
             offset != wpc->ioend->io_offset + wpc->ioend->io_size) {
-               struct xfs_ioend        *new;
- 
                 if (wpc->ioend)
                         list_add(&wpc->ioend->io_list, iolist);
- 
-               new = xfs_alloc_ioend(inode, wpc->io_type);
-               new->io_offset = offset;
-               new->io_buffer_head = bh;
-               new->io_buffer_tail = bh;
-               wpc->ioend = new;
-       } else {
-               wpc->ioend->io_buffer_tail->b_private = bh;
-               wpc->ioend->io_buffer_tail = bh;
+               wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset, bh);
         }
   
-       bh->b_private = NULL;
+       /*
+        * If the buffer doesn't fit into the bio we need to allocate a new
+        * one.  This shouldn't happen more than once for a given buffer.
+        */
+       while (xfs_bio_add_buffer(wpc->ioend->io_bio, bh) != bh->b_size)
+               xfs_chain_bio(wpc->ioend, wbc, bh);
+ 
         wpc->ioend->io_size += bh->b_size;
         wpc->last_block = bh->b_blocknr;
         xfs_start_buffer_writeback(bh);
@@@ -704,7 -706,7 +706,7 @@@ next_buffer
   
         xfs_iunlock(ip, XFS_ILOCK_EXCL);
   out_invalidate:
- -      xfs_vm_invalidatepage(page, 0, PAGE_CACHE_SIZE);
+ +      xfs_vm_invalidatepage(page, 0, PAGE_SIZE);
         return;
   }
   
@@@ -803,7 -805,7 +805,7 @@@ xfs_writepage_map
                         lock_buffer(bh);
                         if (wpc->io_type != XFS_IO_OVERWRITE)
                                 xfs_map_at_offset(inode, bh, &wpc->imap, offset);
-                       xfs_add_to_ioend(inode, bh, offset, wpc, &submit_list);
+                       xfs_add_to_ioend(inode, bh, offset, wpc, wbc, &submit_list);
                         count++;
                 }
   
@@@ -925,9 -927,9 +927,9 @@@ xfs_do_writepage
          * ---------------------------------^------------------|
          */
         offset = i_size_read(inode);
- -      end_index = offset >> PAGE_CACHE_SHIFT;
+ +      end_index = offset >> PAGE_SHIFT;
         if (page->index < end_index)
- -              end_offset = (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT;
+ +              end_offset = (xfs_off_t)(page->index + 1) << PAGE_SHIFT;
         else {
                 /*
                  * Check whether the page to write out is beyond or straddles
@@@ -940,7 -942,7 +942,7 @@@
                  * |                                |      Straddles     |
                  * ---------------------------------^-----------|--------|
                  */
- -              unsigned offset_into_page = offset & (PAGE_CACHE_SIZE - 1);
+ +              unsigned offset_into_page = offset & (PAGE_SIZE - 1);
   
                 /*
                  * Skip the page if it is fully outside i_size, e.g. due to a
@@@ -971,7 -973,7 +973,7 @@@
                  * memory is zeroed when mapped, and writes to that region are
                  * not written out to the file."
                  */
- -              zero_user_segment(page, offset_into_page, PAGE_CACHE_SIZE);
+ +              zero_user_segment(page, offset_into_page, PAGE_SIZE);
   
                 /* Adjust the end_offset to the end of file */
                 end_offset = offset;
@@@ -1391,13 -1393,10 +1393,10 @@@ xfs_end_io_direct_write
   
                 trace_xfs_end_io_direct_write_append(ip, offset, size);
   
-               tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
-               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
-               if (error) {
-                       xfs_trans_cancel(tp);
-                       return error;
-               }
-               error = xfs_setfilesize(ip, tp, offset, size);
+               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0,
+                               &tp);
+               if (!error)
+                       error = xfs_setfilesize(ip, tp, offset, size);
         }
   
         return error;
@@@ -1406,7 -1405,8 +1405,7 @@@
   STATIC ssize_t
   xfs_vm_direct_IO(
         struct kiocb            *iocb,
- -      struct iov_iter         *iter,
- -      loff_t                  offset)
+ +      struct iov_iter         *iter)
   {
         struct inode            *inode = iocb->ki_filp->f_mapping->host;
         dio_iodone_t            *endio = NULL;
@@@ -1419,12 -1419,12 +1418,12 @@@
         }
   
         if (IS_DAX(inode)) {
- -              return dax_do_io(iocb, inode, iter, offset,
+ +              return dax_do_io(iocb, inode, iter,
                                  xfs_get_blocks_direct, endio, 0);
         }
   
         bdev = xfs_find_bdev_for_inode(inode);
- -      return  __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
+ +      return  __blockdev_direct_IO(iocb, inode, bdev, iter,
                         xfs_get_blocks_direct, endio, NULL, flags);
   }
   
@@@ -1474,7 -1474,7 +1473,7 @@@ xfs_vm_write_failed
         loff_t                  block_offset;
         loff_t                  block_start;
         loff_t                  block_end;
- -      loff_t                  from = pos & (PAGE_CACHE_SIZE - 1);
+ +      loff_t                  from = pos & (PAGE_SIZE - 1);
         loff_t                  to = from + len;
         struct buffer_head      *bh, *head;
         struct xfs_mount        *mp = XFS_I(inode)->i_mount;
@@@ -1490,7 -1490,7 +1489,7 @@@
          * start of the page by using shifts rather than masks the mismatch
          * problem.
          */
- -      block_offset = (pos >> PAGE_CACHE_SHIFT) << PAGE_CACHE_SHIFT;
+ +      block_offset = (pos >> PAGE_SHIFT) << PAGE_SHIFT;
   
         ASSERT(block_offset + from == pos);
   
@@@ -1557,12 -1557,12 +1556,12 @@@ xfs_vm_write_begin
         struct page             **pagep,
         void                    **fsdata)
   {
- -      pgoff_t                 index = pos >> PAGE_CACHE_SHIFT;
+ +      pgoff_t                 index = pos >> PAGE_SHIFT;
         struct page             *page;
         int                     status;
         struct xfs_mount        *mp = XFS_I(mapping->host)->i_mount;
   
- -      ASSERT(len <= PAGE_CACHE_SIZE);
+ +      ASSERT(len <= PAGE_SIZE);
   
         page = grab_cache_page_write_begin(mapping, index, flags);
         if (!page)
@@@ -1591,7 -1591,7 +1590,7 @@@
                         truncate_pagecache_range(inode, start, pos + len);
                 }
   
- -              page_cache_release(page);
+ +              put_page(page);
                 page = NULL;
         }
   
@@@ -1619,7 -1619,7 +1618,7 @@@ xfs_vm_write_end
   {
         int                     ret;
   
- -      ASSERT(len <= PAGE_CACHE_SIZE);
+ +      ASSERT(len <= PAGE_SIZE);
   
         ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
         if (unlikely(ret < len)) {
diff --combined fs/xfs/xfs_bmap_util.c

index 3b63098,3246ebc..613ea2d
--- 1/fs/xfs/xfs_bmap_util.c
--- 2/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@@ -900,19 -900,15 +900,15 @@@ xfs_free_eofblocks
                  * Free them up now by truncating the file to
                  * its current size.
                  */
-               tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
- 
                 if (need_iolock) {
-                       if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
-                               xfs_trans_cancel(tp);
+                       if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL))
                                 return -EAGAIN;
-                       }
                 }
   
-               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
+               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0,
+                               &tp);
                 if (error) {
                         ASSERT(XFS_FORCED_SHUTDOWN(mp));
-                       xfs_trans_cancel(tp);
                         if (need_iolock)
                                 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
                         return error;
@@@ -1037,9 -1033,9 +1033,9 @@@ xfs_alloc_file_space
                 /*
                  * Allocate and setup the transaction.
                  */
-               tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
-               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
-                                         resblks, resrtextents);
+               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks,
+                               resrtextents, 0, &tp);
+ 
                 /*
                  * Check for running out of space
                  */
@@@ -1048,7 -1044,6 +1044,6 @@@
                          * Free the transaction structure.
                          */
                         ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
-                       xfs_trans_cancel(tp);
                         break;
                 }
                 xfs_ilock(ip, XFS_ILOCK_EXCL);
@@@ -1237,7 -1232,7 +1232,7 @@@ xfs_free_file_space
         /* wait for the completion of any pending DIOs */
         inode_dio_wait(VFS_I(ip));
   
- -      rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
+ +      rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE);
         ioffset = round_down(offset, rounding);
         iendoffset = round_up(offset + len, rounding) - 1;
         error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ioffset,
@@@ -1311,18 -1306,10 +1306,10 @@@
                  * transaction to dip into the reserve blocks to ensure
                  * the freeing of the space succeeds at ENOSPC.
                  */
-               tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
-               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0);
- 
-               /*
-                * check for running out of space
-                */
+               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0,
+                               &tp);
                 if (error) {
-                       /*
-                        * Free the transaction structure.
-                        */
                         ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
-                       xfs_trans_cancel(tp);
                         break;
                 }
                 xfs_ilock(ip, XFS_ILOCK_EXCL);
@@@ -1466,7 -1453,7 +1453,7 @@@ xfs_shift_file_space
         if (error)
                 return error;
         error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
- -                                      offset >> PAGE_CACHE_SHIFT, -1);
+ +                                      offset >> PAGE_SHIFT, -1);
         if (error)
                 return error;
   
@@@ -1482,19 -1469,16 +1469,16 @@@
         }
   
         while (!error && !done) {
-               tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
                 /*
                  * We would need to reserve permanent block for transaction.
                  * This will come into picture when after shifting extent into
                  * hole we found that adjacent extents can be merged which
                  * may lead to freeing of a block during record update.
                  */
-               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
-                               XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
-               if (error) {
-                       xfs_trans_cancel(tp);
+               error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
+                               XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
+               if (error)
                         break;
-               }
   
                 xfs_ilock(ip, XFS_ILOCK_EXCL);
                 error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
@@@ -1747,12 -1731,9 +1731,9 @@@ xfs_swap_extents
         if (error)
                 goto out_unlock;
   
-       tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT);
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
-       if (error) {
-               xfs_trans_cancel(tp);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
+       if (error)
                 goto out_unlock;
-       }
   
         /*
          * Lock and join the inodes to the tansaction so that transaction commit
diff --combined fs/xfs/xfs_file.c

index 85ce303,98bbd8f..44af228
--- 1/fs/xfs/xfs_file.c
--- 2/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@@ -106,8 -106,8 +106,8 @@@ xfs_iozero
                 unsigned offset, bytes;
                 void *fsdata;
   
- -              offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
- -              bytes = PAGE_CACHE_SIZE - offset;
+ +              offset = (pos & (PAGE_SIZE -1)); /* Within page */
+ +              bytes = PAGE_SIZE - offset;
                 if (bytes > count)
                         bytes = count;
   
@@@ -145,12 -145,10 +145,10 @@@ xfs_update_prealloc_flags
         struct xfs_trans        *tp;
         int                     error;
   
-       tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID);
-       error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0);
-       if (error) {
-               xfs_trans_cancel(tp);
+       error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_writeid,
+                       0, 0, 0, &tp);
+       if (error)
                 return error;
-       }
   
         xfs_ilock(ip, XFS_ILOCK_EXCL);
         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
@@@ -718,19 -716,18 +716,19 @@@ xfs_file_dio_aio_write
         int                     unaligned_io = 0;
         int                     iolock;
         size_t                  count = iov_iter_count(from);
- -      loff_t                  pos = iocb->ki_pos;
         loff_t                  end;
         struct iov_iter         data;
         struct xfs_buftarg      *target = XFS_IS_REALTIME_INODE(ip) ?
                                         mp->m_rtdev_targp : mp->m_ddev_targp;
   
         /* DIO must be aligned to device logical sector size */
- -      if (!IS_DAX(inode) && ((pos | count) & target->bt_logical_sectormask))
+ +      if (!IS_DAX(inode) &&
+ +          ((iocb->ki_pos | count) & target->bt_logical_sectormask))
                 return -EINVAL;
   
         /* "unaligned" here means not aligned to a filesystem block */
- -      if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask))
+ +      if ((iocb->ki_pos & mp->m_blockmask) ||
+ +          ((iocb->ki_pos + count) & mp->m_blockmask))
                 unaligned_io = 1;
   
         /*
@@@ -761,7 -758,8 +759,7 @@@
         if (ret)
                 goto out;
         count = iov_iter_count(from);
- -      pos = iocb->ki_pos;
- -      end = pos + count - 1;
+ +      end = iocb->ki_pos + count - 1;
   
         /*
          * See xfs_file_read_iter() for why we do a full-file flush here.
@@@ -794,18 -792,19 +792,18 @@@
         trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
   
         data = *from;
- -      ret = mapping->a_ops->direct_IO(iocb, &data, pos);
+ +      ret = mapping->a_ops->direct_IO(iocb, &data);
   
         /* see generic_file_direct_write() for why this is necessary */
         if (mapping->nrpages) {
                 invalidate_inode_pages2_range(mapping,
- -                                            pos >> PAGE_CACHE_SHIFT,
- -                                            end >> PAGE_CACHE_SHIFT);
+ +                                            iocb->ki_pos >> PAGE_SHIFT,
+ +                                            end >> PAGE_SHIFT);
         }
   
         if (ret > 0) {
- -              pos += ret;
+ +              iocb->ki_pos += ret;
                 iov_iter_advance(from, ret);
- -              iocb->ki_pos = pos;
         }
   out:
         xfs_rw_iunlock(ip, iolock);
@@@ -903,10 -902,14 +901,10 @@@ xfs_file_write_iter
                 ret = xfs_file_buffered_aio_write(iocb, from);
   
         if (ret > 0) {
- -              ssize_t err;
- -
                 XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret);
   
                 /* Handle various SYNC-type writes */
- -              err = generic_write_sync(file, iocb->ki_pos - ret, ret);
- -              if (err < 0)
- -                      ret = err;
+ +              ret = generic_write_sync(iocb, ret);
         }
         return ret;
   }
@@@ -1202,9 -1205,9 +1200,9 @@@ xfs_find_get_desired_pgoff
   
         pagevec_init(&pvec, 0);
   
- -      index = startoff >> PAGE_CACHE_SHIFT;
+ +      index = startoff >> PAGE_SHIFT;
         endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount);
- -      end = endoff >> PAGE_CACHE_SHIFT;
+ +      end = endoff >> PAGE_SHIFT;
         do {
                 int             want;
                 unsigned        nr_pages;
@@@ -1709,7 -1712,7 +1707,7 @@@ const struct file_operations xfs_file_o
   const struct file_operations xfs_dir_file_operations = {
         .open           = xfs_dir_open,
         .read           = generic_read_dir,
- -      .iterate        = xfs_file_readdir,
+ +      .iterate_shared = xfs_file_readdir,
         .llseek         = generic_file_llseek,
         .unlocked_ioctl = xfs_file_ioctl,
   #ifdef CONFIG_COMPAT
diff --combined fs/xfs/xfs_mount.c

index cfd4210,3b67b14..e39b023
--- 1/fs/xfs/xfs_mount.c
--- 2/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@@ -89,7 -89,6 +89,6 @@@ xfs_uuid_mount
         if (hole < 0) {
                 xfs_uuid_table = kmem_realloc(xfs_uuid_table,
                         (xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table),
-                       xfs_uuid_table_size  * sizeof(*xfs_uuid_table),
                         KM_SLEEP);
                 hole = xfs_uuid_table_size++;
         }
@@@ -171,7 -170,7 +170,7 @@@ xfs_sb_validate_fsb_count
         ASSERT(sbp->sb_blocklog >= BBSHIFT);
   
         /* Limited by ULONG_MAX of page cache index */
- -      if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
+ +      if (nblocks >> (PAGE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
                 return -EFBIG;
         return 0;
   }
@@@ -681,6 -680,9 +680,9 @@@ xfs_mountfs
   
         xfs_set_maxicount(mp);
   
+       /* enable fail_at_unmount as default */
+       mp->m_fail_unmount = 1;
+ 
         error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname);
         if (error)
                 goto out;
@@@ -690,10 -692,15 +692,15 @@@
         if (error)
                 goto out_remove_sysfs;
   
-       error = xfs_uuid_mount(mp);
+       error = xfs_error_sysfs_init(mp);
         if (error)
                 goto out_del_stats;
   
+ 
+       error = xfs_uuid_mount(mp);
+       if (error)
+               goto out_remove_error_sysfs;
+ 
         /*
          * Set the minimum read and write sizes
          */
@@@ -957,6 -964,7 +964,7 @@@
         cancel_delayed_work_sync(&mp->m_reclaim_work);
         xfs_reclaim_inodes(mp, SYNC_WAIT);
    out_log_dealloc:
+       mp->m_flags |= XFS_MOUNT_UNMOUNTING;
         xfs_log_mount_cancel(mp);
    out_fail_wait:
         if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
@@@ -968,6 -976,8 +976,8 @@@
         xfs_da_unmount(mp);
    out_remove_uuid:
         xfs_uuid_unmount(mp);
+  out_remove_error_sysfs:
+       xfs_error_sysfs_del(mp);
    out_del_stats:
         xfs_sysfs_del(&mp->m_stats.xs_kobj);
    out_remove_sysfs:
@@@ -1005,6 -1015,14 +1015,14 @@@ xfs_unmountfs
          */
         xfs_log_force(mp, XFS_LOG_SYNC);
   
+       /*
+        * We now need to tell the world we are unmounting. This will allow
+        * us to detect that the filesystem is going away and we should error
+        * out anything that we have been retrying in the background. This will
+        * prevent neverending retries in AIL pushing from hanging the unmount.
+        */
+       mp->m_flags |= XFS_MOUNT_UNMOUNTING;
+ 
         /*
          * Flush all pending changes from the AIL.
          */
@@@ -1056,6 -1074,7 +1074,7 @@@
   #endif
         xfs_free_perag(mp);
   
+       xfs_error_sysfs_del(mp);
         xfs_sysfs_del(&mp->m_stats.xs_kobj);
         xfs_sysfs_del(&mp->m_kobj);
   }
diff --combined fs/xfs/xfs_mount.h

index eafe257,9063a9c..c1b798c
--- 1/fs/xfs/xfs_mount.h
--- 2/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@@ -37,6 -37,32 +37,32 @@@ enum 
         XFS_LOWSP_MAX,
   };
   
+ /*
+  * Error Configuration
+  *
+  * Error classes define the subsystem the configuration belongs to.
+  * Error numbers define the errors that are configurable.
+  */
+ enum {
+       XFS_ERR_METADATA,
+       XFS_ERR_CLASS_MAX,
+ };
+ enum {
+       XFS_ERR_DEFAULT,
+       XFS_ERR_EIO,
+       XFS_ERR_ENOSPC,
+       XFS_ERR_ENODEV,
+       XFS_ERR_ERRNO_MAX,
+ };
+ 
+ #define XFS_ERR_RETRY_FOREVER -1
+ 
+ struct xfs_error_cfg {
+       struct xfs_kobj kobj;
+       int             max_retries;
+       unsigned long   retry_timeout;  /* in jiffies, 0 = no timeout */
+ };
+ 
   typedef struct xfs_mount {
         struct super_block      *m_super;
         xfs_tid_t               m_tid;          /* next unused tid for fs */
@@@ -127,6 -153,9 +153,9 @@@
         int64_t                 m_low_space[XFS_LOWSP_MAX];
                                                 /* low free space thresholds */
         struct xfs_kobj         m_kobj;
+       struct xfs_kobj         m_error_kobj;
+       struct xfs_kobj         m_error_meta_kobj;
+       struct xfs_error_cfg    m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX];
         struct xstats           m_stats;        /* per-fs stats */
   
         struct workqueue_struct *m_buf_workqueue;
@@@ -148,6 -177,7 +177,7 @@@
          */
         __uint32_t              m_generation;
   
+       bool                    m_fail_unmount;
   #ifdef DEBUG
         /*
          * DEBUG mode instrumentation to test and/or trigger delayed allocation
@@@ -166,6 -196,7 +196,7 @@@
   #define XFS_MOUNT_WSYNC               (1ULL << 0)     /* for nfs - all metadata ops
                                                    must be synchronous except
                                                    for space allocations */
+ #define XFS_MOUNT_UNMOUNTING  (1ULL << 1)     /* filesystem is unmounting */
   #define XFS_MOUNT_WAS_CLEAN   (1ULL << 3)
   #define XFS_MOUNT_FS_SHUTDOWN (1ULL << 4)     /* atomic stop of all filesystem
                                                    operations, typically for
@@@ -231,12 -262,12 +262,12 @@@ static inline unsigned lon
   xfs_preferred_iosize(xfs_mount_t *mp)
   {
         if (mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE)
- -              return PAGE_CACHE_SIZE;
+ +              return PAGE_SIZE;
         return (mp->m_swidth ?
                 (mp->m_swidth << mp->m_sb.sb_blocklog) :
                 ((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) ?
                         (1 << (int)MAX(mp->m_readio_log, mp->m_writeio_log)) :
- -                      PAGE_CACHE_SIZE));
+ +                      PAGE_SIZE));
   }
   
   #define XFS_LAST_UNMOUNT_WAS_CLEAN(mp)        \
@@@ -364,4 -395,7 +395,7 @@@ extern void        xfs_set_low_space_threshold
   int   xfs_zero_extent(struct xfs_inode *ip, xfs_fsblock_t start_fsb,
                         xfs_off_t count_fsb);
   
+ struct xfs_error_cfg * xfs_error_get_cfg(struct xfs_mount *mp,
+               int error_class, int error);
+ 
   #endif        /* __XFS_MOUNT_H__ */
diff --combined fs/xfs/xfs_pnfs.c

index 51ddaf2,3332bae..d5b7566
--- 1/fs/xfs/xfs_pnfs.c
--- 2/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@@ -293,8 -293,8 +293,8 @@@ xfs_fs_commit_blocks
                  * Make sure reads through the pagecache see the new data.
                  */
                 error = invalidate_inode_pages2_range(inode->i_mapping,
- -                                      start >> PAGE_CACHE_SHIFT,
- -                                      (end - 1) >> PAGE_CACHE_SHIFT);
+ +                                      start >> PAGE_SHIFT,
+ +                                      (end - 1) >> PAGE_SHIFT);
                 WARN_ON_ONCE(error);
   
                 error = xfs_iomap_write_unwritten(ip, start, length);
@@@ -308,12 -308,9 +308,9 @@@
                         goto out_drop_iolock;
         }
   
-       tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
-       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
-       if (error) {
-               xfs_trans_cancel(tp);
+       error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
+       if (error)
                 goto out_drop_iolock;
-       }
   
         xfs_ilock(ip, XFS_ILOCK_EXCL);
         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
diff --combined fs/xfs/xfs_super.c

index 187e14b,d8424f5..416421d
--- 1/fs/xfs/xfs_super.c
--- 2/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@@ -58,8 -58,7 +58,7 @@@
   #include <linux/parser.h>
   
   static const struct super_operations xfs_super_operations;
- static kmem_zone_t *xfs_ioend_zone;
- mempool_t *xfs_ioend_pool;
+ struct bio_set *xfs_ioend_bioset;
   
   static struct kset *xfs_kset;         /* top-level xfs sysfs dir */
   #ifdef DEBUG
@@@ -350,6 -349,7 +349,7 @@@ xfs_parseargs
                 case Opt_pqnoenforce:
                         mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
                         mp->m_qflags &= ~XFS_PQUOTA_ENFD;
+                       break;
                 case Opt_gquota:
                 case Opt_grpquota:
                         mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
@@@ -556,10 -556,10 +556,10 @@@ xfs_max_file_offset
         /* Figure out maximum filesize, on Linux this can depend on
          * the filesystem blocksize (on 32 bit platforms).
          * __block_write_begin does this in an [unsigned] long...
- -       *      page->index << (PAGE_CACHE_SHIFT - bbits)
+ +       *      page->index << (PAGE_SHIFT - bbits)
          * So, for page sized blocks (4K on 32 bit platforms),
          * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
- -       *      (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
+ +       *      (((u64)PAGE_SIZE << (BITS_PER_LONG-1))-1)
          * but for smaller blocksizes it is less (bbits = log2 bsize).
          * Note1: get_block_t takes a long (implicit cast from above)
          * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
@@@ -570,10 -570,10 +570,10 @@@
   #if BITS_PER_LONG == 32
   # if defined(CONFIG_LBDAF)
         ASSERT(sizeof(sector_t) == 8);
- -      pagefactor = PAGE_CACHE_SIZE;
+ +      pagefactor = PAGE_SIZE;
         bitshift = BITS_PER_LONG;
   # else
- -      pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift);
+ +      pagefactor = PAGE_SIZE >> (PAGE_SHIFT - blockshift);
   # endif
   #endif
   
@@@ -928,7 -928,7 +928,7 @@@ xfs_fs_alloc_inode
   
   /*
    * Now that the generic code is guaranteed not to be accessing
-  * the linux inode, we can reclaim the inode.
+  * the linux inode, we can inactivate and reclaim the inode.
    */
   STATIC void
   xfs_fs_destroy_inode(
@@@ -938,9 -938,14 +938,14 @@@
   
         trace_xfs_destroy_inode(ip);
   
-       XFS_STATS_INC(ip->i_mount, vn_reclaim);
+       ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
+       XFS_STATS_INC(ip->i_mount, vn_rele);
+       XFS_STATS_INC(ip->i_mount, vn_remove);
+ 
+       xfs_inactive(ip);
   
         ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
+       XFS_STATS_INC(ip->i_mount, vn_reclaim);
   
         /*
          * We should never get here with one of the reclaim flags already set.
@@@ -987,24 -992,6 +992,6 @@@ xfs_fs_inode_init_once
                      "xfsino", ip->i_ino);
   }
   
- STATIC void
- xfs_fs_evict_inode(
-       struct inode            *inode)
- {
-       xfs_inode_t             *ip = XFS_I(inode);
- 
-       ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
- 
-       trace_xfs_evict_inode(ip);
- 
-       truncate_inode_pages_final(&inode->i_data);
-       clear_inode(inode);
-       XFS_STATS_INC(ip->i_mount, vn_rele);
-       XFS_STATS_INC(ip->i_mount, vn_remove);
- 
-       xfs_inactive(ip);
- }
- 
   /*
    * We do an unlocked check for XFS_IDONTCACHE here because we are already
    * serialised against cache hits here via the inode->i_lock and igrab() in
@@@ -1276,6 -1263,16 +1263,16 @@@ xfs_fs_remount
                         return -EINVAL;
                 }
   
+               if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
+                   xfs_sb_has_ro_compat_feature(sbp,
+                                       XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
+                       xfs_warn(mp,
+ "ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
+                               (sbp->sb_features_ro_compat &
+                                       XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
+                       return -EINVAL;
+               }
+ 
                 mp->m_flags &= ~XFS_MOUNT_RDONLY;
   
                 /*
@@@ -1663,7 -1660,6 +1660,6 @@@ xfs_fs_free_cached_objects
   static const struct super_operations xfs_super_operations = {
         .alloc_inode            = xfs_fs_alloc_inode,
         .destroy_inode          = xfs_fs_destroy_inode,
-       .evict_inode            = xfs_fs_evict_inode,
         .drop_inode             = xfs_fs_drop_inode,
         .put_super              = xfs_fs_put_super,
         .sync_fs                = xfs_fs_sync_fs,
@@@ -1688,20 -1684,15 +1684,15 @@@ MODULE_ALIAS_FS("xfs")
   STATIC int __init
   xfs_init_zones(void)
   {
- 
-       xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
-       if (!xfs_ioend_zone)
+       xfs_ioend_bioset = bioset_create(4 * MAX_BUF_PER_PAGE,
+                       offsetof(struct xfs_ioend, io_inline_bio));
+       if (!xfs_ioend_bioset)
                 goto out;
   
-       xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
-                                                 xfs_ioend_zone);
-       if (!xfs_ioend_pool)
-               goto out_destroy_ioend_zone;
- 
         xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
                                                 "xfs_log_ticket");
         if (!xfs_log_ticket_zone)
-               goto out_destroy_ioend_pool;
+               goto out_free_ioend_bioset;
   
         xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
                                                 "xfs_bmap_free_item");
@@@ -1797,10 -1788,8 +1788,8 @@@
         kmem_zone_destroy(xfs_bmap_free_item_zone);
    out_destroy_log_ticket_zone:
         kmem_zone_destroy(xfs_log_ticket_zone);
-  out_destroy_ioend_pool:
-       mempool_destroy(xfs_ioend_pool);
-  out_destroy_ioend_zone:
-       kmem_zone_destroy(xfs_ioend_zone);
+  out_free_ioend_bioset:
+       bioset_free(xfs_ioend_bioset);
    out:
         return -ENOMEM;
   }
@@@ -1826,9 -1815,7 +1815,7 @@@ xfs_destroy_zones(void
         kmem_zone_destroy(xfs_btree_cur_zone);
         kmem_zone_destroy(xfs_bmap_free_item_zone);
         kmem_zone_destroy(xfs_log_ticket_zone);
-       mempool_destroy(xfs_ioend_pool);
-       kmem_zone_destroy(xfs_ioend_zone);
- 
+       bioset_free(xfs_ioend_bioset);
   }
   
   STATIC int __init
diff --combined fs/xfs/xfs_xattr.c

index d111f69,7fdcf33..ec58ff0
--- 1/fs/xfs/xfs_xattr.c
--- 2/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@@ -32,11 -32,11 +32,11 @@@
   
   
   static int
- -xfs_xattr_get(const struct xattr_handler *handler, struct dentry *dentry,
- -              const char *name, void *value, size_t size)
+ +xfs_xattr_get(const struct xattr_handler *handler, struct dentry *unused,
+ +              struct inode *inode, const char *name, void *value, size_t size)
   {
         int xflags = handler->flags;
- -      struct xfs_inode *ip = XFS_I(d_inode(dentry));
+ +      struct xfs_inode *ip = XFS_I(inode);
         int error, asize = size;
   
         /* Convert Linux syscall to XFS internal ATTR flags */
@@@ -146,7 -146,7 +146,7 @@@ __xfs_xattr_put_listent
         arraytop = context->count + prefix_len + namelen + 1;
         if (arraytop > context->firstu) {
                 context->count = -1;    /* insufficient space */
-               return 1;
+               return 0;
         }
         offset = (char *)context->alist + context->count;
         strncpy(offset, prefix, prefix_len);
@@@ -166,8 -166,7 +166,7 @@@ xfs_xattr_put_listent
         int             flags,
         unsigned char   *name,
         int             namelen,
-       int             valuelen,
-       unsigned char   *value)
+       int             valuelen)
   {
         char *prefix;
         int prefix_len;
@@@ -221,11 -220,15 +220,15 @@@
   }
   
   ssize_t
- xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
+ xfs_vn_listxattr(
+       struct dentry   *dentry,
+       char            *data,
+       size_t          size)
   {
         struct xfs_attr_list_context context;
         struct attrlist_cursor_kern cursor = { 0 };
-       struct inode            *inode = d_inode(dentry);
+       struct inode    *inode = d_inode(dentry);
+       int             error;
   
         /*
          * First read the regular on-disk attributes.
@@@ -239,7 -242,9 +242,9 @@@
         context.firstu = context.bufsize;
         context.put_listent = xfs_xattr_put_listent;
   
-       xfs_attr_list_int(&context);
+       error = xfs_attr_list_int(&context);
+       if (error)
+               return error;
         if (context.count < 0)
                 return -ERANGE;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 26 May 2016 17:13:40 +0000 (10:13 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 26 May 2016 17:13:40 +0000 (10:13 -0700)
		1	2
fs/namei.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/libxfs/xfs_bmap.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_aops.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_bmap_util.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_file.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_mount.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_mount.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_pnfs.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/xfs/xfs_xattr.c	patch \|	diff1 \|	diff2 \|	blob \| history