Merge branch 'work.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 10 Oct 2016 20:04:49 +0000 (13:04 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 10 Oct 2016 20:04:49 +0000 (13:04 -0700)
Pull misc vfs updates from Al Viro:
 "Assorted misc bits and pieces.

  There are several single-topic branches left after this (rename2
  series from Miklos, current_time series from Deepa Dinamani, xattr
  series from Andreas, uaccess stuff from from me) and I'd prefer to
  send those separately"

* 'work.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (39 commits)
  proc: switch auxv to use of __mem_open()
  hpfs: support FIEMAP
  cifs: get rid of unused arguments of CIFSSMBWrite()
  posix_acl: uapi header split
  posix_acl: xattr representation cleanups
  fs/aio.c: eliminate redundant loads in put_aio_ring_file
  fs/internal.h: add const to ns_dentry_operations declaration
  compat: remove compat_printk()
  fs/buffer.c: make __getblk_slow() static
  proc: unsigned file descriptors
  fs/file: more unsigned file descriptors
  fs: compat: remove redundant check of nr_segs
  cachefiles: Fix attempt to read i_blocks after deleting file [ver #2]
  cifs: don't use memcpy() to copy struct iov_iter
  get rid of separate multipage fault-in primitives
  fs: Avoid premature clearing of capabilities
  fs: Give dentry to inode_change_ok() instead of inode
  fuse: Propagate dentry down to inode_change_ok()
  ceph: Propagate dentry down to inode_change_ok()
  xfs: Propagate dentry down to inode_change_ok()
  ...

35 files changed:
1  2 
drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
drivers/staging/lustre/lustre/llite/file.c
drivers/staging/lustre/lustre/llite/llite_lib.c
drivers/staging/lustre/lustre/ptlrpc/wiretest.c
fs/btrfs/inode.c
fs/ext2/inode.c
fs/ext4/inode.c
fs/f2fs/acl.c
fs/f2fs/file.c
fs/f2fs/node.c
fs/fuse/dir.c
fs/fuse/file.c
fs/fuse/fuse_i.h
fs/gfs2/inode.c
fs/hugetlbfs/inode.c
fs/internal.h
fs/locks.c
fs/namespace.c
fs/ocfs2/dlmfs/dlmfs.c
fs/ocfs2/file.c
fs/orangefs/file.c
fs/orangefs/namei.c
fs/orangefs/orangefs-debugfs.c
fs/proc/base.c
fs/proc/generic.c
fs/proc/proc_sysctl.c
fs/xfs/xfs_file.c
include/linux/fs.h
include/linux/pagemap.h
include/linux/uio.h
include/uapi/linux/Kbuild
kernel/sysctl.c
lib/iov_iter.c
mm/shmem.c

@@@ -504,28 -504,6 +504,28 @@@ static inline char *iwl_dbgfs_is_match(
        return !strncmp(name, buf, len) ? buf + len : NULL;
  }
  
 +static ssize_t iwl_dbgfs_os_device_timediff_read(struct file *file,
 +                                               char __user *user_buf,
 +                                               size_t count, loff_t *ppos)
 +{
 +      struct ieee80211_vif *vif = file->private_data;
 +      struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
 +      struct iwl_mvm *mvm = mvmvif->mvm;
 +      u32 curr_gp2;
 +      u64 curr_os;
 +      s64 diff;
 +      char buf[64];
 +      const size_t bufsz = sizeof(buf);
 +      int pos = 0;
 +
 +      iwl_mvm_get_sync_time(mvm, &curr_gp2, &curr_os);
 +      do_div(curr_os, NSEC_PER_USEC);
 +      diff = curr_os - curr_gp2;
 +      pos += scnprintf(buf + pos, bufsz - pos, "diff=%lld\n", diff);
 +
 +      return simple_read_from_buffer(user_buf, count, ppos, buf, pos);
 +}
 +
  static ssize_t iwl_dbgfs_tof_enable_write(struct ieee80211_vif *vif,
                                          char *buf,
                                          size_t count, loff_t *ppos)
@@@ -1552,8 -1530,6 +1552,8 @@@ MVM_DEBUGFS_READ_FILE_OPS(tof_range_res
  MVM_DEBUGFS_READ_WRITE_FILE_OPS(tof_responder_params, 32);
  MVM_DEBUGFS_READ_WRITE_FILE_OPS(quota_min, 32);
  MVM_DEBUGFS_WRITE_FILE_OPS(lqm_send_cmd, 64);
 +MVM_DEBUGFS_READ_FILE_OPS(os_device_timediff);
 +
  
  void iwl_mvm_vif_dbgfs_register(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
  {
        mvmvif->dbgfs_dir = debugfs_create_dir("iwlmvm", dbgfs_dir);
  
        if (!mvmvif->dbgfs_dir) {
-               IWL_ERR(mvm, "Failed to create debugfs directory under %s\n",
-                       dbgfs_dir->d_name.name);
+               IWL_ERR(mvm, "Failed to create debugfs directory under %pd\n",
+                       dbgfs_dir);
                return;
        }
  
        if (iwlmvm_mod_params.power_scheme != IWL_POWER_SCHEME_CAM &&
            ((vif->type == NL80211_IFTYPE_STATION && !vif->p2p) ||
 -           (vif->type == NL80211_IFTYPE_STATION && vif->p2p &&
 -            mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_DCM)))
 +           (vif->type == NL80211_IFTYPE_STATION && vif->p2p)))
                MVM_DEBUGFS_ADD_FILE_VIF(pm_params, mvmvif->dbgfs_dir, S_IWUSR |
                                         S_IRUSR);
  
        MVM_DEBUGFS_ADD_FILE_VIF(quota_min, mvmvif->dbgfs_dir,
                                 S_IRUSR | S_IWUSR);
        MVM_DEBUGFS_ADD_FILE_VIF(lqm_send_cmd, mvmvif->dbgfs_dir, S_IWUSR);
 +      MVM_DEBUGFS_ADD_FILE_VIF(os_device_timediff,
 +                               mvmvif->dbgfs_dir, S_IRUSR);
  
        if (vif->type == NL80211_IFTYPE_STATION && !vif->p2p &&
            mvmvif == mvm->bf_allowed_vif)
         * find
         * netdev:wlan0 -> ../../../ieee80211/phy0/netdev:wlan0/iwlmvm/
         */
-       snprintf(buf, 100, "../../../%s/%s/%s/%s",
-                dbgfs_dir->d_parent->d_parent->d_name.name,
-                dbgfs_dir->d_parent->d_name.name,
-                dbgfs_dir->d_name.name,
-                mvmvif->dbgfs_dir->d_name.name);
+       snprintf(buf, 100, "../../../%pd3/%pd",
+                dbgfs_dir,
+                mvmvif->dbgfs_dir);
  
        mvmvif->dbgfs_slink = debugfs_create_symlink(dbgfs_dir->d_name.name,
                                                     mvm->debugfs_dir, buf);
        if (!mvmvif->dbgfs_slink)
-               IWL_ERR(mvm, "Can't create debugfs symbolic link under %s\n",
-                       dbgfs_dir->d_name.name);
+               IWL_ERR(mvm, "Can't create debugfs symbolic link under %pd\n",
+                       dbgfs_dir);
        return;
  err:
        IWL_ERR(mvm, "Can't create debugfs entity\n");
@@@ -917,59 -917,6 +917,59 @@@ static ssize_t iwl_dbgfs_indirection_tb
        return ret ?: count;
  }
  
 +static ssize_t iwl_dbgfs_inject_packet_write(struct iwl_mvm *mvm,
 +                                           char *buf, size_t count,
 +                                           loff_t *ppos)
 +{
 +      struct iwl_rx_cmd_buffer rxb = {
 +              ._rx_page_order = 0,
 +              .truesize = 0, /* not used */
 +              ._offset = 0,
 +      };
 +      struct iwl_rx_packet *pkt;
 +      struct iwl_rx_mpdu_desc *desc;
 +      int bin_len = count / 2;
 +      int ret = -EINVAL;
 +
 +      /* supporting only 9000 descriptor */
 +      if (!mvm->trans->cfg->mq_rx_supported)
 +              return -ENOTSUPP;
 +
 +      rxb._page = alloc_pages(GFP_ATOMIC, 0);
 +      if (!rxb._page)
 +              return -ENOMEM;
 +      pkt = rxb_addr(&rxb);
 +
 +      ret = hex2bin(page_address(rxb._page), buf, bin_len);
 +      if (ret)
 +              goto out;
 +
 +      /* avoid invalid memory access */
 +      if (bin_len < sizeof(*pkt) + sizeof(*desc))
 +              goto out;
 +
 +      /* check this is RX packet */
 +      if (WIDE_ID(pkt->hdr.group_id, pkt->hdr.cmd) !=
 +          WIDE_ID(LEGACY_GROUP, REPLY_RX_MPDU_CMD))
 +              goto out;
 +
 +      /* check the length in metadata matches actual received length */
 +      desc = (void *)pkt->data;
 +      if (le16_to_cpu(desc->mpdu_len) !=
 +          (bin_len - sizeof(*desc) - sizeof(*pkt)))
 +              goto out;
 +
 +      local_bh_disable();
 +      iwl_mvm_rx_mpdu_mq(mvm, NULL, &rxb, 0);
 +      local_bh_enable();
 +      ret = 0;
 +
 +out:
 +      iwl_free_rxb(&rxb);
 +
 +      return ret ?: count;
 +}
 +
  static ssize_t iwl_dbgfs_fw_dbg_conf_read(struct file *file,
                                          char __user *user_buf,
                                          size_t count, loff_t *ppos)
@@@ -1507,7 -1454,6 +1507,7 @@@ MVM_DEBUGFS_WRITE_FILE_OPS(cont_recordi
  MVM_DEBUGFS_WRITE_FILE_OPS(max_amsdu_len, 8);
  MVM_DEBUGFS_WRITE_FILE_OPS(indirection_tbl,
                           (IWL_RSS_INDIRECTION_TABLE_SIZE * 2));
 +MVM_DEBUGFS_WRITE_FILE_OPS(inject_packet, 512);
  
  #ifdef CONFIG_IWLWIFI_BCAST_FILTERING
  MVM_DEBUGFS_READ_WRITE_FILE_OPS(bcast_filters, 256);
@@@ -1518,132 -1464,6 +1518,132 @@@ MVM_DEBUGFS_READ_WRITE_FILE_OPS(bcast_f
  MVM_DEBUGFS_READ_WRITE_FILE_OPS(d3_sram, 8);
  #endif
  
 +static ssize_t iwl_dbgfs_mem_read(struct file *file, char __user *user_buf,
 +                                size_t count, loff_t *ppos)
 +{
 +      struct iwl_mvm *mvm = file->private_data;
 +      struct iwl_dbg_mem_access_cmd cmd = {};
 +      struct iwl_dbg_mem_access_rsp *rsp;
 +      struct iwl_host_cmd hcmd = {
 +              .flags = CMD_WANT_SKB | CMD_SEND_IN_RFKILL,
 +              .data = { &cmd, },
 +              .len = { sizeof(cmd) },
 +      };
 +      size_t delta, len;
 +      ssize_t ret;
 +
 +      hcmd.id = iwl_cmd_id(*ppos >> 24 ? UMAC_RD_WR : LMAC_RD_WR,
 +                           DEBUG_GROUP, 0);
 +      cmd.op = cpu_to_le32(DEBUG_MEM_OP_READ);
 +
 +      /* Take care of alignment of both the position and the length */
 +      delta = *ppos & 0x3;
 +      cmd.addr = cpu_to_le32(*ppos - delta);
 +      cmd.len = cpu_to_le32(min(ALIGN(count + delta, 4) / 4,
 +                                (size_t)DEBUG_MEM_MAX_SIZE_DWORDS));
 +
 +      mutex_lock(&mvm->mutex);
 +      ret = iwl_mvm_send_cmd(mvm, &hcmd);
 +      mutex_unlock(&mvm->mutex);
 +
 +      if (ret < 0)
 +              return ret;
 +
 +      rsp = (void *)hcmd.resp_pkt->data;
 +      if (le32_to_cpu(rsp->status) != DEBUG_MEM_STATUS_SUCCESS) {
 +              ret = -ENXIO;
 +              goto out;
 +      }
 +
 +      len = min((size_t)le32_to_cpu(rsp->len) << 2,
 +                iwl_rx_packet_payload_len(hcmd.resp_pkt) - sizeof(*rsp));
 +      len = min(len - delta, count);
 +      if (len < 0) {
 +              ret = -EFAULT;
 +              goto out;
 +      }
 +
 +      ret = len - copy_to_user(user_buf, (void *)rsp->data + delta, len);
 +      *ppos += ret;
 +
 +out:
 +      iwl_free_resp(&hcmd);
 +      return ret;
 +}
 +
 +static ssize_t iwl_dbgfs_mem_write(struct file *file,
 +                                 const char __user *user_buf, size_t count,
 +                                 loff_t *ppos)
 +{
 +      struct iwl_mvm *mvm = file->private_data;
 +      struct iwl_dbg_mem_access_cmd *cmd;
 +      struct iwl_dbg_mem_access_rsp *rsp;
 +      struct iwl_host_cmd hcmd = {};
 +      size_t cmd_size;
 +      size_t data_size;
 +      u32 op, len;
 +      ssize_t ret;
 +
 +      hcmd.id = iwl_cmd_id(*ppos >> 24 ? UMAC_RD_WR : LMAC_RD_WR,
 +                           DEBUG_GROUP, 0);
 +
 +      if (*ppos & 0x3 || count < 4) {
 +              op = DEBUG_MEM_OP_WRITE_BYTES;
 +              len = min(count, (size_t)(4 - (*ppos & 0x3)));
 +              data_size = len;
 +      } else {
 +              op = DEBUG_MEM_OP_WRITE;
 +              len = min(count >> 2, (size_t)DEBUG_MEM_MAX_SIZE_DWORDS);
 +              data_size = len << 2;
 +      }
 +
 +      cmd_size = sizeof(*cmd) + ALIGN(data_size, 4);
 +      cmd = kzalloc(cmd_size, GFP_KERNEL);
 +      if (!cmd)
 +              return -ENOMEM;
 +
 +      cmd->op = cpu_to_le32(op);
 +      cmd->len = cpu_to_le32(len);
 +      cmd->addr = cpu_to_le32(*ppos);
 +      if (copy_from_user((void *)cmd->data, user_buf, data_size)) {
 +              kfree(cmd);
 +              return -EFAULT;
 +      }
 +
 +      hcmd.flags = CMD_WANT_SKB | CMD_SEND_IN_RFKILL,
 +      hcmd.data[0] = (void *)cmd;
 +      hcmd.len[0] = cmd_size;
 +
 +      mutex_lock(&mvm->mutex);
 +      ret = iwl_mvm_send_cmd(mvm, &hcmd);
 +      mutex_unlock(&mvm->mutex);
 +
 +      kfree(cmd);
 +
 +      if (ret < 0)
 +              return ret;
 +
 +      rsp = (void *)hcmd.resp_pkt->data;
 +      if (rsp->status != DEBUG_MEM_STATUS_SUCCESS) {
 +              ret = -ENXIO;
 +              goto out;
 +      }
 +
 +      ret = data_size;
 +      *ppos += ret;
 +
 +out:
 +      iwl_free_resp(&hcmd);
 +      return ret;
 +}
 +
 +static const struct file_operations iwl_dbgfs_mem_ops = {
 +      .read = iwl_dbgfs_mem_read,
 +      .write = iwl_dbgfs_mem_write,
 +      .open = simple_open,
 +      .llseek = default_llseek,
 +};
 +
  int iwl_mvm_dbgfs_register(struct iwl_mvm *mvm, struct dentry *dbgfs_dir)
  {
        struct dentry *bcast_dir __maybe_unused;
        MVM_DEBUGFS_ADD_FILE(send_echo_cmd, mvm->debugfs_dir, S_IWUSR);
        MVM_DEBUGFS_ADD_FILE(cont_recording, mvm->debugfs_dir, S_IWUSR);
        MVM_DEBUGFS_ADD_FILE(indirection_tbl, mvm->debugfs_dir, S_IWUSR);
 +      MVM_DEBUGFS_ADD_FILE(inject_packet, mvm->debugfs_dir, S_IWUSR);
        if (!debugfs_create_bool("enable_scan_iteration_notif",
                                 S_IRUSR | S_IWUSR,
                                 mvm->debugfs_dir,
                                 mvm->debugfs_dir, &mvm->nvm_phy_sku_blob))
                goto err;
  
 +      debugfs_create_file("mem", S_IRUSR | S_IWUSR, dbgfs_dir, mvm,
 +                          &iwl_dbgfs_mem_ops);
 +
        /*
         * Create a symlink with mac80211. It will be removed when mac80211
         * exists (before the opmode exists which removes the target.)
         */
-       snprintf(buf, 100, "../../%s/%s",
-                dbgfs_dir->d_parent->d_parent->d_name.name,
-                dbgfs_dir->d_parent->d_name.name);
+       snprintf(buf, 100, "../../%pd2", dbgfs_dir->d_parent);
        if (!debugfs_create_symlink("iwlwifi", mvm->hw->wiphy->debugfsdir, buf))
                goto err;
  
  
  #define DEBUG_SUBSYSTEM S_LLITE
  #include "../include/lustre_dlm.h"
 -#include "../include/lustre_lite.h"
  #include <linux/pagemap.h>
  #include <linux/file.h>
 +#include <linux/sched.h>
  #include <linux/mount.h>
 -#include "llite_internal.h"
  #include "../include/lustre/ll_fiemap.h"
 +#include "../include/lustre/lustre_ioctl.h"
  
  #include "../include/cl_object.h"
 +#include "llite_internal.h"
  
  static int
  ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
@@@ -189,11 -188,17 +189,11 @@@ static int ll_close_inode_openhandle(st
                spin_unlock(&lli->lli_lock);
        }
  
 -      if (rc == 0) {
 -              rc = ll_objects_destroy(req, inode);
 -              if (rc)
 -                      CERROR("inode %lu ll_objects destroy: rc = %d\n",
 -                             inode->i_ino, rc);
 -      }
        if (rc == 0 && op_data->op_bias & MDS_HSM_RELEASE) {
                struct mdt_body *body;
  
                body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
 -              if (!(body->valid & OBD_MD_FLRELEASED))
 +              if (!(body->mbo_valid & OBD_MD_FLRELEASED))
                        rc = -EBUSY;
        }
  
@@@ -344,11 -349,13 +344,11 @@@ int ll_file_release(struct inode *inode
        fd = LUSTRE_FPRIVATE(file);
        LASSERT(fd);
  
 -      /* The last ref on @file, maybe not be the owner pid of statahead.
 -       * Different processes can open the same dir, "ll_opendir_key" means:
 -       * it is me that should stop the statahead thread.
 +      /* The last ref on @file, maybe not be the owner pid of statahead,
 +       * because parent and child process can share the same file handle.
         */
 -      if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd &&
 -          lli->lli_opendir_pid != 0)
 -              ll_stop_statahead(inode, lli->lli_opendir_key);
 +      if (S_ISDIR(inode->i_mode) && lli->lli_opendir_key == fd)
 +              ll_deauthorize_statahead(inode, fd);
  
        if (is_root_inode(inode)) {
                LUSTRE_FPRIVATE(file) = NULL;
        }
  
        if (!S_ISDIR(inode->i_mode)) {
 -              lov_read_and_clear_async_rc(lli->lli_clob);
 +              if (lli->lli_clob)
 +                      lov_read_and_clear_async_rc(lli->lli_clob);
                lli->lli_async_rc = 0;
        }
  
        return rc;
  }
  
 -static int ll_intent_file_open(struct dentry *dentry, void *lmm,
 -                             int lmmsize, struct lookup_intent *itp)
 +static int ll_intent_file_open(struct dentry *de, void *lmm, int lmmsize,
 +                             struct lookup_intent *itp)
  {
 -      struct inode *inode = d_inode(dentry);
 +      struct inode *inode = d_inode(de);
        struct ll_sb_info *sbi = ll_i2sbi(inode);
 -      struct dentry *parent = dentry->d_parent;
 -      const char *name = dentry->d_name.name;
 -      const int len = dentry->d_name.len;
 +      struct dentry *parent = de->d_parent;
 +      const char *name = NULL;
        struct md_op_data *op_data;
 -      struct ptlrpc_request *req;
 -      __u32 opc = LUSTRE_OPC_ANY;
 -      int rc;
 +      struct ptlrpc_request *req = NULL;
 +      int len = 0, rc;
  
 -      /* Usually we come here only for NFSD, and we want open lock. */
 -      /* We can also get here if there was cached open handle in revalidate_it
 -       * but it disappeared while we were getting from there to ll_file_open.
 -       * But this means this file was closed and immediately opened which
 -       * makes a good candidate for using OPEN lock
 -       */
 -      /* If lmmsize & lmm are not 0, we are just setting stripe info
 -       * parameters. No need for the open lock
 +      LASSERT(parent);
 +      LASSERT(itp->it_flags & MDS_OPEN_BY_FID);
 +
 +      /*
 +       * if server supports open-by-fid, or file name is invalid, don't pack
 +       * name in open request
         */
 -      if (!lmm && lmmsize == 0) {
 -              struct ll_dentry_data *ldd = ll_d2d(dentry);
 -              /*
 -               * If we came via ll_iget_for_nfs, then we need to request
 -               * struct ll_dentry_data *ldd = ll_d2d(file->f_dentry);
 -               *
 -               * NB: when ldd is NULL, it must have come via normal
 -               * lookup path only, since ll_iget_for_nfs always calls
 -               * ll_d_init().
 -               */
 -              if (ldd && ldd->lld_nfs_dentry) {
 -                      ldd->lld_nfs_dentry = 0;
 -                      itp->it_flags |= MDS_OPEN_LOCK;
 -              }
 -              if (itp->it_flags & FMODE_WRITE)
 -                      opc = LUSTRE_OPC_CREATE;
 +      if (!(exp_connect_flags(sbi->ll_md_exp) & OBD_CONNECT_OPEN_BY_FID) &&
 +          lu_name_is_valid_2(de->d_name.name, de->d_name.len)) {
 +              name = de->d_name.name;
 +              len = de->d_name.len;
        }
  
 -      op_data  = ll_prep_md_op_data(NULL, d_inode(parent),
 -                                    inode, name, len,
 -                                    O_RDWR, opc, NULL);
 +      op_data  = ll_prep_md_op_data(NULL, d_inode(parent), inode, name, len,
 +                                    O_RDWR, LUSTRE_OPC_ANY, NULL);
        if (IS_ERR(op_data))
                return PTR_ERR(op_data);
 +      op_data->op_data = lmm;
 +      op_data->op_data_size = lmmsize;
  
 -      itp->it_flags |= MDS_OPEN_BY_FID;
 -      rc = md_intent_lock(sbi->ll_md_exp, op_data, lmm, lmmsize, itp,
 -                          0 /*unused */, &req, ll_md_blocking_ast, 0);
 +      rc = md_intent_lock(sbi->ll_md_exp, op_data, itp, &req,
 +                          &ll_md_blocking_ast, 0);
        ll_finish_md_op_data(op_data);
        if (rc == -ESTALE) {
                /* reason for keep own exit path - don`t flood log
@@@ -457,8 -479,8 +457,8 @@@ static int ll_och_fill(struct obd_expor
        struct mdt_body *body;
  
        body = req_capsule_server_get(&it->it_request->rq_pill, &RMF_MDT_BODY);
 -      och->och_fh = body->handle;
 -      och->och_fid = body->fid1;
 +      och->och_fh = body->mbo_handle;
 +      och->och_fid = body->mbo_fid1;
        och->och_lease_handle.cookie = it->it_lock_handle;
        och->och_magic = OBD_CLIENT_HANDLE_MAGIC;
        och->och_flags = it->it_flags;
@@@ -486,7 -508,7 +486,7 @@@ static int ll_local_open(struct file *f
  
                body = req_capsule_server_get(&it->it_request->rq_pill,
                                              &RMF_MDT_BODY);
 -              ll_ioepoch_open(lli, body->ioepoch);
 +              ll_ioepoch_open(lli, body->mbo_ioepoch);
        }
  
        LUSTRE_FPRIVATE(file) = fd;
@@@ -521,7 -543,7 +521,7 @@@ int ll_file_open(struct inode *inode, s
        struct obd_client_handle **och_p = NULL;
        __u64 *och_usecount = NULL;
        struct ll_file_data *fd;
 -      int rc = 0, opendir_set = 0;
 +      int rc = 0;
  
        CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), flags %o\n",
               PFID(ll_inode2fid(inode)), inode, file->f_flags);
        }
  
        fd->fd_file = file;
 -      if (S_ISDIR(inode->i_mode)) {
 -              spin_lock(&lli->lli_sa_lock);
 -              if (!lli->lli_opendir_key && !lli->lli_sai &&
 -                  lli->lli_opendir_pid == 0) {
 -                      lli->lli_opendir_key = fd;
 -                      lli->lli_opendir_pid = current_pid();
 -                      opendir_set = 1;
 -              }
 -              spin_unlock(&lli->lli_sa_lock);
 -      }
 +      if (S_ISDIR(inode->i_mode))
 +              ll_authorize_statahead(inode, fd);
  
        if (is_root_inode(inode)) {
                LUSTRE_FPRIVATE(file) = fd;
@@@ -585,7 -615,7 +585,7 @@@ restart
        } else if (it->it_flags & FMODE_EXEC) {
                och_p = &lli->lli_mds_exec_och;
                och_usecount = &lli->lli_open_fd_exec_count;
 -       } else {
 +      } else {
                och_p = &lli->lli_mds_read_och;
                och_usecount = &lli->lli_open_fd_read_count;
        }
                         * result in a deadlock
                         */
                        mutex_unlock(&lli->lli_och_mutex);
 -                      it->it_create_mode |= M_CHECK_STALE;
 +                      /*
 +                       * Normally called under two situations:
 +                       * 1. NFS export.
 +                       * 2. revalidate with IT_OPEN (revalidate doesn't
 +                       *    execute this intent any more).
 +                       *
 +                       * Always fetch MDS_OPEN_LOCK if this is not setstripe.
 +                       *
 +                       * Always specify MDS_OPEN_BY_FID because we don't want
 +                       * to get file with different fid.
 +                       */
 +                      it->it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID;
                        rc = ll_intent_file_open(file->f_path.dentry, NULL, 0, it);
 -                      it->it_create_mode &= ~M_CHECK_STALE;
                        if (rc)
                                goto out_openerr;
  
@@@ -696,10 -716,9 +696,10 @@@ out_och_free
                mutex_unlock(&lli->lli_och_mutex);
  
  out_openerr:
 -              if (opendir_set != 0)
 -                      ll_stop_statahead(inode, lli->lli_opendir_key);
 -              ll_file_data_put(fd);
 +              if (lli->lli_opendir_key == fd)
 +                      ll_deauthorize_statahead(inode, fd);
 +              if (fd)
 +                      ll_file_data_put(fd);
        } else {
                ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_OPEN, 1);
        }
@@@ -745,7 -764,7 +745,7 @@@ ll_lease_open(struct inode *inode, stru
        struct lookup_intent it = { .it_op = IT_OPEN };
        struct ll_sb_info *sbi = ll_i2sbi(inode);
        struct md_op_data *op_data;
 -      struct ptlrpc_request *req;
 +      struct ptlrpc_request *req = NULL;
        struct lustre_handle old_handle = { 0 };
        struct obd_client_handle *och = NULL;
        int rc;
  
        it.it_flags = fmode | open_flags;
        it.it_flags |= MDS_OPEN_LOCK | MDS_OPEN_BY_FID | MDS_OPEN_LEASE;
 -      rc = md_intent_lock(sbi->ll_md_exp, op_data, NULL, 0, &it, 0, &req,
 -                          ll_md_blocking_lease_ast,
 +      rc = md_intent_lock(sbi->ll_md_exp, op_data, &it, &req,
 +                          &ll_md_blocking_lease_ast,
        /* LDLM_FL_NO_LRU: To not put the lease lock into LRU list, otherwise
         * it can be cancelled which may mislead applications that the lease is
         * broken;
         * open in ll_md_blocking_ast(). Otherwise as ll_md_blocking_lease_ast
         * doesn't deal with openhandle, so normal openhandle will be leaked.
         */
 -                              LDLM_FL_NO_LRU | LDLM_FL_EXCL);
 +                          LDLM_FL_NO_LRU | LDLM_FL_EXCL);
        ll_finish_md_op_data(op_data);
        ptlrpc_req_finished(req);
        if (rc < 0)
@@@ -889,6 -908,7 +889,6 @@@ static int ll_lease_close(struct obd_cl
  {
        struct ldlm_lock *lock;
        bool cancelled = true;
 -      int rc;
  
        lock = ldlm_handle2lock(&och->och_lease_handle);
        if (lock) {
        if (lease_broken)
                *lease_broken = cancelled;
  
 -      rc = ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp, inode, och,
 -                                     NULL);
 -      return rc;
 +      return ll_close_inode_openhandle(ll_i2sbi(inode)->ll_md_exp,
 +                                       inode, och, NULL);
  }
  
  /* Fills the obdo with the attributes for the lsm */
@@@ -1117,12 -1138,11 +1117,12 @@@ ll_file_io_generic(const struct lu_env 
  {
        struct ll_inode_info *lli = ll_i2info(file_inode(file));
        struct ll_file_data  *fd  = LUSTRE_FPRIVATE(file);
 +      struct range_lock range;
        struct cl_io     *io;
        ssize_t        result;
  
-       CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: %llu, count: %zu\n",
-              file->f_path.dentry->d_name.name, iot, *ppos, count);
 -      CDEBUG(D_VFSTRACE, "file: %pD, type: %d ppos: %llu, count: %zd\n",
++      CDEBUG(D_VFSTRACE, "file: %pD, type: %d ppos: %llu, count: %zu\n",
+              file, iot, *ppos, count);
  
  restart:
        io = vvp_env_thread_io(env);
  
        if (cl_io_rw_init(env, io, iot, *ppos, count) == 0) {
                struct vvp_io *vio = vvp_env_io(env);
 -              int write_mutex_locked = 0;
 +              bool range_locked = false;
 +
 +              if (file->f_flags & O_APPEND)
 +                      range_lock_init(&range, 0, LUSTRE_EOF);
 +              else
 +                      range_lock_init(&range, *ppos, *ppos + count - 1);
  
                vio->vui_fd  = LUSTRE_FPRIVATE(file);
 -              vio->vui_io_subtype = args->via_io_subtype;
 -
 -              switch (vio->vui_io_subtype) {
 -              case IO_NORMAL:
 -                      vio->vui_iter = args->u.normal.via_iter;
 -                      vio->vui_iocb = args->u.normal.via_iocb;
 -                      if ((iot == CIT_WRITE) &&
 -                          !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
 -                              if (mutex_lock_interruptible(&lli->
 -                                                             lli_write_mutex)) {
 -                                      result = -ERESTARTSYS;
 -                                      goto out;
 -                              }
 -                              write_mutex_locked = 1;
 -                      }
 -                      down_read(&lli->lli_trunc_sem);
 -                      break;
 -              case IO_SPLICE:
 -                      vio->u.splice.vui_pipe = args->u.splice.via_pipe;
 -                      vio->u.splice.vui_flags = args->u.splice.via_flags;
 -                      break;
 -              default:
 -                      CERROR("Unknown IO type - %u\n", vio->vui_io_subtype);
 -                      LBUG();
 +              vio->vui_iter = args->u.normal.via_iter;
 +              vio->vui_iocb = args->u.normal.via_iocb;
 +              /*
 +               * Direct IO reads must also take range lock,
 +               * or multiple reads will try to work on the same pages
 +               * See LU-6227 for details.
 +               */
 +              if (((iot == CIT_WRITE) ||
 +                   (iot == CIT_READ && (file->f_flags & O_DIRECT))) &&
 +                  !(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
 +                      CDEBUG(D_VFSTRACE, "Range lock [%llu, %llu]\n",
 +                             range.rl_node.in_extent.start,
 +                             range.rl_node.in_extent.end);
 +                      result = range_lock(&lli->lli_write_tree,
 +                                          &range);
 +                      if (result < 0)
 +                              goto out;
 +
 +                      range_locked = true;
                }
 +              down_read(&lli->lli_trunc_sem);
                ll_cl_add(file, env, io);
                result = cl_io_loop(env, io);
                ll_cl_remove(file, env);
 -              if (args->via_io_subtype == IO_NORMAL)
 -                      up_read(&lli->lli_trunc_sem);
 -              if (write_mutex_locked)
 -                      mutex_unlock(&lli->lli_write_mutex);
 +              up_read(&lli->lli_trunc_sem);
 +              if (range_locked) {
 +                      CDEBUG(D_VFSTRACE, "Range unlock [%llu, %llu]\n",
 +                             range.rl_node.in_extent.start,
 +                             range.rl_node.in_extent.end);
 +                      range_unlock(&lli->lli_write_tree, &range);
 +              }
        } else {
                /* cl_io_rw_init() handled IO */
                result = io->ci_result;
@@@ -1185,7 -1201,7 +1185,7 @@@ out
         * short read/write instead of restart io.
         */
        if ((result == 0 || result == -ENODATA) && io->ci_need_restart) {
 -              CDEBUG(D_VFSTRACE, "Restart %s on %pD from %lld, count:%zd\n",
 +              CDEBUG(D_VFSTRACE, "Restart %s on %pD from %lld, count:%zu\n",
                       iot == CIT_READ ? "read" : "write",
                       file, *ppos, count);
                LASSERTF(io->ci_nob == 0, "%zd\n", io->ci_nob);
@@@ -1221,7 -1237,7 +1221,7 @@@ static ssize_t ll_file_read_iter(struc
        if (IS_ERR(env))
                return PTR_ERR(env);
  
 -      args = ll_env_args(env, IO_NORMAL);
 +      args = ll_env_args(env);
        args->u.normal.via_iter = to;
        args->u.normal.via_iocb = iocb;
  
@@@ -1245,7 -1261,7 +1245,7 @@@ static ssize_t ll_file_write_iter(struc
        if (IS_ERR(env))
                return PTR_ERR(env);
  
 -      args = ll_env_args(env, IO_NORMAL);
 +      args = ll_env_args(env);
        args->u.normal.via_iter = from;
        args->u.normal.via_iocb = iocb;
  
        return result;
  }
  
 -/*
 - * Send file content (through pagecache) somewhere with helper
 - */
 -static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
 -                                 struct pipe_inode_info *pipe, size_t count,
 -                                 unsigned int flags)
 -{
 -      struct lu_env      *env;
 -      struct vvp_io_args *args;
 -      ssize_t      result;
 -      int              refcheck;
 -
 -      env = cl_env_get(&refcheck);
 -      if (IS_ERR(env))
 -              return PTR_ERR(env);
 -
 -      args = ll_env_args(env, IO_SPLICE);
 -      args->u.splice.via_pipe = pipe;
 -      args->u.splice.via_flags = flags;
 -
 -      result = ll_file_io_generic(env, args, in_file, CIT_READ, ppos, count);
 -      cl_env_put(env, &refcheck);
 -      return result;
 -}
 -
 -static int ll_lov_recreate(struct inode *inode, struct ost_id *oi, u32 ost_idx)
 -{
 -      struct obd_export *exp = ll_i2dtexp(inode);
 -      struct obd_trans_info oti = { 0 };
 -      struct obdo *oa = NULL;
 -      int lsm_size;
 -      int rc = 0;
 -      struct lov_stripe_md *lsm = NULL, *lsm2;
 -
 -      oa = kmem_cache_zalloc(obdo_cachep, GFP_NOFS);
 -      if (!oa)
 -              return -ENOMEM;
 -
 -      lsm = ccc_inode_lsm_get(inode);
 -      if (!lsm_has_objects(lsm)) {
 -              rc = -ENOENT;
 -              goto out;
 -      }
 -
 -      lsm_size = sizeof(*lsm) + (sizeof(struct lov_oinfo) *
 -                 (lsm->lsm_stripe_count));
 -
 -      lsm2 = libcfs_kvzalloc(lsm_size, GFP_NOFS);
 -      if (!lsm2) {
 -              rc = -ENOMEM;
 -              goto out;
 -      }
 -
 -      oa->o_oi = *oi;
 -      oa->o_nlink = ost_idx;
 -      oa->o_flags |= OBD_FL_RECREATE_OBJS;
 -      oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
 -      obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
 -                                 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
 -      obdo_set_parent_fid(oa, &ll_i2info(inode)->lli_fid);
 -      memcpy(lsm2, lsm, lsm_size);
 -      ll_inode_size_lock(inode);
 -      rc = obd_create(NULL, exp, oa, &lsm2, &oti);
 -      ll_inode_size_unlock(inode);
 -
 -      kvfree(lsm2);
 -      goto out;
 -out:
 -      ccc_inode_lsm_put(inode, lsm);
 -      kmem_cache_free(obdo_cachep, oa);
 -      return rc;
 -}
 -
 -static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
 -{
 -      struct ll_recreate_obj ucreat;
 -      struct ost_id           oi;
 -
 -      if (!capable(CFS_CAP_SYS_ADMIN))
 -              return -EPERM;
 -
 -      if (copy_from_user(&ucreat, (struct ll_recreate_obj __user *)arg,
 -                         sizeof(ucreat)))
 -              return -EFAULT;
 -
 -      ostid_set_seq_mdt0(&oi);
 -      ostid_set_id(&oi, ucreat.lrc_id);
 -      return ll_lov_recreate(inode, &oi, ucreat.lrc_ost_idx);
 -}
 -
 -static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
 -{
 -      struct lu_fid   fid;
 -      struct ost_id   oi;
 -      u32             ost_idx;
 -
 -      if (!capable(CFS_CAP_SYS_ADMIN))
 -              return -EPERM;
 -
 -      if (copy_from_user(&fid, (struct lu_fid __user *)arg, sizeof(fid)))
 -              return -EFAULT;
 -
 -      fid_to_ostid(&fid, &oi);
 -      ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
 -      return ll_lov_recreate(inode, &oi, ost_idx);
 -}
 -
  int ll_lov_setstripe_ea_info(struct inode *inode, struct dentry *dentry,
                             __u64 flags, struct lov_user_md *lum,
                             int lum_size)
  {
        struct lov_stripe_md *lsm = NULL;
 -      struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
 +      struct lookup_intent oit = {
 +              .it_op = IT_OPEN,
 +              .it_flags = flags | MDS_OPEN_BY_FID,
 +      };
        int rc = 0;
  
        lsm = ccc_inode_lsm_get(inode);
  
        ll_inode_size_lock(inode);
        rc = ll_intent_file_open(dentry, lum, lum_size, &oit);
 -      if (rc)
 +      if (rc < 0)
                goto out_unlock;
        rc = oit.it_status;
        if (rc < 0)
 -              goto out_req_free;
 +              goto out_unlock;
  
        ll_release_openhandle(inode, &oit);
  
@@@ -1291,6 -1411,9 +1291,6 @@@ out_unlock
        ccc_inode_lsm_put(inode, lsm);
  out:
        return rc;
 -out_req_free:
 -      ptlrpc_req_finished((struct ptlrpc_request *)oit.it_request);
 -      goto out;
  }
  
  int ll_lov_getstripe_ea_info(struct inode *inode, const char *filename,
  
        body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
  
 -      lmmsize = body->eadatasize;
 +      lmmsize = body->mbo_eadatasize;
  
 -      if (!(body->valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
 +      if (!(body->mbo_valid & (OBD_MD_FLEASIZE | OBD_MD_FLDIREA)) ||
            lmmsize == 0) {
                rc = -ENODATA;
                goto out;
                 */
                if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V1)) {
                        lustre_swab_lov_user_md_v1((struct lov_user_md_v1 *)lmm);
 -                      if (S_ISREG(body->mode))
 +                      if (S_ISREG(body->mbo_mode))
                                lustre_swab_lov_user_md_objects(
                                 ((struct lov_user_md_v1 *)lmm)->lmm_objects,
                                 stripe_count);
                } else if (lmm->lmm_magic == cpu_to_le32(LOV_MAGIC_V3)) {
                        lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)lmm);
 -                      if (S_ISREG(body->mode))
 +                      if (S_ISREG(body->mbo_mode))
                                lustre_swab_lov_user_md_objects(
                                 ((struct lov_user_md_v3 *)lmm)->lmm_objects,
                                 stripe_count);
@@@ -1407,48 -1530,55 +1407,48 @@@ static int ll_lov_setea(struct inode *i
        return rc;
  }
  
 +static int ll_file_getstripe(struct inode *inode,
 +                           struct lov_user_md __user *lum)
 +{
 +      struct lu_env *env;
 +      int refcheck;
 +      int rc;
 +
 +      env = cl_env_get(&refcheck);
 +      if (IS_ERR(env))
 +              return PTR_ERR(env);
 +
 +      rc = cl_object_getstripe(env, ll_i2info(inode)->lli_clob, lum);
 +      cl_env_put(env, &refcheck);
 +      return rc;
 +}
 +
  static int ll_lov_setstripe(struct inode *inode, struct file *file,
                            unsigned long arg)
  {
 -      struct lov_user_md_v3 lumv3;
 -      struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
 -      struct lov_user_md_v1 __user *lumv1p = (void __user *)arg;
 -      struct lov_user_md_v3 __user *lumv3p = (void __user *)arg;
 +      struct lov_user_md __user *lum = (struct lov_user_md __user *)arg;
 +      struct lov_user_md *klum;
        int lum_size, rc;
        __u64 flags = FMODE_WRITE;
  
 -      /* first try with v1 which is smaller than v3 */
 -      lum_size = sizeof(struct lov_user_md_v1);
 -      if (copy_from_user(lumv1, lumv1p, lum_size))
 -              return -EFAULT;
 -
 -      if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
 -              lum_size = sizeof(struct lov_user_md_v3);
 -              if (copy_from_user(&lumv3, lumv3p, lum_size))
 -                      return -EFAULT;
 -      }
 +      rc = ll_copy_user_md(lum, &klum);
 +      if (rc < 0)
 +              return rc;
  
 -      rc = ll_lov_setstripe_ea_info(inode, file->f_path.dentry, flags, lumv1,
 +      lum_size = rc;
 +      rc = ll_lov_setstripe_ea_info(inode, file->f_path.dentry, flags, klum,
                                      lum_size);
        cl_lov_delay_create_clear(&file->f_flags);
        if (rc == 0) {
 -              struct lov_stripe_md *lsm;
                __u32 gen;
  
 -              put_user(0, &lumv1p->lmm_stripe_count);
 +              put_user(0, &lum->lmm_stripe_count);
  
                ll_layout_refresh(inode, &gen);
 -              lsm = ccc_inode_lsm_get(inode);
 -              rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
 -                                 0, lsm, (void __user *)arg);
 -              ccc_inode_lsm_put(inode, lsm);
 +              rc = ll_file_getstripe(inode, (struct lov_user_md __user *)arg);
        }
 -      return rc;
 -}
  
 -static int ll_lov_getstripe(struct inode *inode, unsigned long arg)
 -{
 -      struct lov_stripe_md *lsm;
 -      int rc = -ENODATA;
 -
 -      lsm = ccc_inode_lsm_get(inode);
 -      if (lsm)
 -              rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode), 0,
 -                                 lsm, (void __user *)arg);
 -      ccc_inode_lsm_put(inode, lsm);
 +      kfree(klum);
        return rc;
  }
  
@@@ -2117,12 -2247,6 +2117,12 @@@ free_hss
        return rc;
  }
  
 +static inline long ll_lease_type_from_fmode(fmode_t fmode)
 +{
 +      return ((fmode & FMODE_READ) ? LL_LEASE_RDLCK : 0) |
 +             ((fmode & FMODE_WRITE) ? LL_LEASE_WRLCK : 0);
 +}
 +
  static long
  ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
  {
                return rc;
        }
        case LL_IOC_LOV_GETSTRIPE:
 -              return ll_lov_getstripe(inode, arg);
 -      case LL_IOC_RECREATE_OBJ:
 -              return ll_lov_recreate_obj(inode, arg);
 -      case LL_IOC_RECREATE_FID:
 -              return ll_lov_recreate_fid(inode, arg);
 +              return ll_file_getstripe(inode,
 +                                       (struct lov_user_md __user *)arg);
        case FSFILT_IOC_FIEMAP:
                return ll_ioctl_fiemap(inode, arg);
        case FSFILT_IOC_GETFLAGS:
  
                return 0;
        }
 +      case LL_IOC_GETPARENT:
 +              return ll_getparent(file, (struct getparent __user *)arg);
        case OBD_IOC_FID2PATH:
                return ll_fid2path(inode, (void __user *)arg);
        case LL_IOC_DATA_VERSION: {
                struct ll_inode_info *lli = ll_i2info(inode);
                struct obd_client_handle *och = NULL;
                bool lease_broken;
 -              fmode_t mode = 0;
 +              fmode_t fmode;
  
                switch (arg) {
 -              case F_WRLCK:
 +              case LL_LEASE_WRLCK:
                        if (!(file->f_mode & FMODE_WRITE))
                                return -EPERM;
 -                      mode = FMODE_WRITE;
 +                      fmode = FMODE_WRITE;
                        break;
 -              case F_RDLCK:
 +              case LL_LEASE_RDLCK:
                        if (!(file->f_mode & FMODE_READ))
                                return -EPERM;
 -                      mode = FMODE_READ;
 +                      fmode = FMODE_READ;
                        break;
 -              case F_UNLCK:
 +              case LL_LEASE_UNLCK:
                        mutex_lock(&lli->lli_och_mutex);
                        if (fd->fd_lease_och) {
                                och = fd->fd_lease_och;
                        }
                        mutex_unlock(&lli->lli_och_mutex);
  
 -                      if (och) {
 -                              mode = och->och_flags &
 -                                     (FMODE_READ|FMODE_WRITE);
 -                              rc = ll_lease_close(och, inode, &lease_broken);
 -                              if (rc == 0 && lease_broken)
 -                                      mode = 0;
 -                      } else {
 -                              rc = -ENOLCK;
 -                      }
 +                      if (!och)
 +                              return -ENOLCK;
 +
 +                      fmode = och->och_flags;
 +                      rc = ll_lease_close(och, inode, &lease_broken);
 +                      if (rc < 0)
 +                              return rc;
 +
 +                      if (lease_broken)
 +                              fmode = 0;
  
 -                      /* return the type of lease or error */
 -                      return rc < 0 ? rc : (int)mode;
 +                      return ll_lease_type_from_fmode(fmode);
                default:
                        return -EINVAL;
                }
  
 -              CDEBUG(D_INODE, "Set lease with mode %d\n", mode);
 +              CDEBUG(D_INODE, "Set lease with mode %u\n", fmode);
  
                /* apply for lease */
 -              och = ll_lease_open(inode, file, mode, 0);
 +              och = ll_lease_open(inode, file, fmode, 0);
                if (IS_ERR(och))
                        return PTR_ERR(och);
  
        case LL_IOC_GET_LEASE: {
                struct ll_inode_info *lli = ll_i2info(inode);
                struct ldlm_lock *lock = NULL;
 +              fmode_t fmode = 0;
  
 -              rc = 0;
                mutex_lock(&lli->lli_och_mutex);
                if (fd->fd_lease_och) {
                        struct obd_client_handle *och = fd->fd_lease_och;
                        if (lock) {
                                lock_res_and_lock(lock);
                                if (!ldlm_is_cancel(lock))
 -                                      rc = och->och_flags &
 -                                              (FMODE_READ | FMODE_WRITE);
 +                                      fmode = och->och_flags;
                                unlock_res_and_lock(lock);
                                LDLM_LOCK_PUT(lock);
                        }
                }
                mutex_unlock(&lli->lli_och_mutex);
 -              return rc;
 +              return ll_lease_type_from_fmode(fmode);
        }
        case LL_IOC_HSM_IMPORT: {
                struct hsm_user_import *hui;
@@@ -2448,8 -2574,9 +2448,8 @@@ static loff_t ll_file_seek(struct file 
                eof = i_size_read(inode);
        }
  
 -      retval = generic_file_llseek_size(file, offset, origin,
 -                                        ll_file_maxbytes(inode), eof);
 -      return retval;
 +      return generic_file_llseek_size(file, offset, origin,
 +                                      ll_file_maxbytes(inode), eof);
  }
  
  static int ll_flush(struct file *file, fl_owner_t id)
         */
        rc = lli->lli_async_rc;
        lli->lli_async_rc = 0;
 -      err = lov_read_and_clear_async_rc(lli->lli_clob);
 -      if (rc == 0)
 -              rc = err;
 +      if (lli->lli_clob) {
 +              err = lov_read_and_clear_async_rc(lli->lli_clob);
 +              if (!rc)
 +                      rc = err;
 +      }
  
        /* The application has been told about write failure already.
         * Do not report failure again.
@@@ -2589,7 -2714,6 +2589,7 @@@ ll_file_flock(struct file *file, int cm
        struct md_op_data *op_data;
        struct lustre_handle lockh = {0};
        ldlm_policy_data_t flock = { {0} };
 +      int fl_type = file_lock->fl_type;
        __u64 flags = 0;
        int rc;
        int rc2 = 0;
        if (file_lock->fl_lmops && file_lock->fl_lmops->lm_compare_owner)
                flock.l_flock.owner = (unsigned long)file_lock->fl_pid;
  
 -      switch (file_lock->fl_type) {
 +      switch (fl_type) {
        case F_RDLCK:
                einfo.ei_mode = LCK_PR;
                break;
                einfo.ei_mode = LCK_PW;
                break;
        default:
 -              CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n",
 -                     file_lock->fl_type);
 +              CDEBUG(D_INFO, "Unknown fcntl lock type: %d\n", fl_type);
                return -ENOTSUPP;
        }
  
        case F_GETLK64:
  #endif
                flags = LDLM_FL_TEST_LOCK;
 -              /* Save the old mode so that if the mode in the lock changes we
 -               * can decrement the appropriate reader or writer refcount.
 -               */
 -              file_lock->fl_type = einfo.ei_mode;
                break;
        default:
                CERROR("unknown fcntl lock command: %d\n", cmd);
                return -EINVAL;
        }
  
 +      /*
 +       * Save the old mode so that if the mode in the lock changes we
 +       * can decrement the appropriate reader or writer refcount.
 +       */
 +      file_lock->fl_type = einfo.ei_mode;
 +
        op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
                                     LUSTRE_OPC_ANY, NULL);
        if (IS_ERR(op_data))
               PFID(ll_inode2fid(inode)), flock.l_flock.pid, flags,
               einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
  
 -      rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL,
 -                      op_data, &lockh, &flock, 0, NULL /* req */, flags);
 +      rc = md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data, &lockh,
 +                      flags);
 +
 +      /* Restore the file lock type if not TEST lock. */
 +      if (!(flags & LDLM_FL_TEST_LOCK))
 +              file_lock->fl_type = fl_type;
  
        if ((rc == 0 || file_lock->fl_type == F_UNLCK) &&
            !(flags & LDLM_FL_TEST_LOCK))
  
        if (rc2 && file_lock->fl_type != F_UNLCK) {
                einfo.ei_mode = LCK_NL;
 -              md_enqueue(sbi->ll_md_exp, &einfo, NULL,
 -                         op_data, &lockh, &flock, 0, NULL /* req */, flags);
 +              md_enqueue(sbi->ll_md_exp, &einfo, &flock, NULL, op_data,
 +                         &lockh, flags);
                rc = rc2;
        }
  
        return rc;
  }
  
 +int ll_get_fid_by_name(struct inode *parent, const char *name,
 +                     int namelen, struct lu_fid *fid)
 +{
 +      struct md_op_data *op_data = NULL;
 +      struct ptlrpc_request *req;
 +      struct mdt_body *body;
 +      int rc;
 +
 +      op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen, 0,
 +                                   LUSTRE_OPC_ANY, NULL);
 +      if (IS_ERR(op_data))
 +              return PTR_ERR(op_data);
 +
 +      op_data->op_valid = OBD_MD_FLID;
 +      rc = md_getattr_name(ll_i2sbi(parent)->ll_md_exp, op_data, &req);
 +      ll_finish_md_op_data(op_data);
 +      if (rc < 0)
 +              return rc;
 +
 +      body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
 +      if (!body) {
 +              rc = -EFAULT;
 +              goto out_req;
 +      }
 +      if (fid)
 +              *fid = body->mbo_fid1;
 +out_req:
 +      ptlrpc_req_finished(req);
 +      return rc;
 +}
 +
 +int ll_migrate(struct inode *parent, struct file *file, int mdtidx,
 +             const char *name, int namelen)
 +{
 +      struct ptlrpc_request *request = NULL;
 +      struct inode *child_inode = NULL;
 +      struct dentry *dchild = NULL;
 +      struct md_op_data *op_data;
 +      struct qstr qstr;
 +      int rc;
 +
 +      CDEBUG(D_VFSTRACE, "migrate %s under "DFID" to MDT%d\n",
 +             name, PFID(ll_inode2fid(parent)), mdtidx);
 +
 +      op_data = ll_prep_md_op_data(NULL, parent, NULL, name, namelen,
 +                                   0, LUSTRE_OPC_ANY, NULL);
 +      if (IS_ERR(op_data))
 +              return PTR_ERR(op_data);
 +
 +      /* Get child FID first */
 +      qstr.hash = full_name_hash(parent, name, namelen);
 +      qstr.name = name;
 +      qstr.len = namelen;
 +      dchild = d_lookup(file_dentry(file), &qstr);
 +      if (dchild) {
 +              op_data->op_fid3 = *ll_inode2fid(dchild->d_inode);
 +              if (dchild->d_inode) {
 +                      child_inode = igrab(dchild->d_inode);
 +                      if (child_inode) {
 +                              inode_lock(child_inode);
 +                              op_data->op_fid3 = *ll_inode2fid(child_inode);
 +                              ll_invalidate_aliases(child_inode);
 +                      }
 +              }
 +              dput(dchild);
 +      } else {
 +              rc = ll_get_fid_by_name(parent, name, namelen,
 +                                      &op_data->op_fid3);
 +              if (rc)
 +                      goto out_free;
 +      }
 +
 +      if (!fid_is_sane(&op_data->op_fid3)) {
 +              CERROR("%s: migrate %s, but fid "DFID" is insane\n",
 +                     ll_get_fsname(parent->i_sb, NULL, 0), name,
 +                     PFID(&op_data->op_fid3));
 +              rc = -EINVAL;
 +              goto out_free;
 +      }
 +
 +      rc = ll_get_mdt_idx_by_fid(ll_i2sbi(parent), &op_data->op_fid3);
 +      if (rc < 0)
 +              goto out_free;
 +
 +      if (rc == mdtidx) {
 +              CDEBUG(D_INFO, "%s:"DFID" is already on MDT%d.\n", name,
 +                     PFID(&op_data->op_fid3), mdtidx);
 +              rc = 0;
 +              goto out_free;
 +      }
 +
 +      op_data->op_mds = mdtidx;
 +      op_data->op_cli_flags = CLI_MIGRATE;
 +      rc = md_rename(ll_i2sbi(parent)->ll_md_exp, op_data, name,
 +                     namelen, name, namelen, &request);
 +      if (!rc)
 +              ll_update_times(request, parent);
 +
 +      ptlrpc_req_finished(request);
 +
 +out_free:
 +      if (child_inode) {
 +              clear_nlink(child_inode);
 +              inode_unlock(child_inode);
 +              iput(child_inode);
 +      }
 +
 +      ll_finish_md_op_data(op_data);
 +      return rc;
 +}
 +
  static int
  ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
  {
@@@ -2839,7 -2847,7 +2839,7 @@@ int ll_have_md_lock(struct inode *inode
        struct lustre_handle lockh;
        ldlm_policy_data_t policy;
        enum ldlm_mode mode = (l_req_mode == LCK_MINMODE) ?
 -                              (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
 +                            (LCK_CR | LCK_CW | LCK_PR | LCK_PW) : l_req_mode;
        struct lu_fid *fid;
        __u64 flags;
        int i;
@@@ -2880,12 -2888,15 +2880,12 @@@ enum ldlm_mode ll_take_md_lock(struct i
  {
        ldlm_policy_data_t policy = { .l_inodebits = {bits} };
        struct lu_fid *fid;
 -      enum ldlm_mode rc;
  
        fid = &ll_i2info(inode)->lli_fid;
        CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
  
 -      rc = md_lock_match(ll_i2mdexp(inode), flags | LDLM_FL_BLOCK_GRANTED,
 -                         fid, LDLM_IBITS, &policy, mode, lockh);
 -
 -      return rc;
 +      return md_lock_match(ll_i2mdexp(inode), flags | LDLM_FL_BLOCK_GRANTED,
 +                           fid, LDLM_IBITS, &policy, mode, lockh);
  }
  
  static int ll_inode_revalidate_fini(struct inode *inode, int rc)
@@@ -2938,9 -2949,15 +2938,9 @@@ static int __ll_inode_revalidate(struc
                if (IS_ERR(op_data))
                        return PTR_ERR(op_data);
  
 -              oit.it_create_mode |= M_CHECK_STALE;
 -              rc = md_intent_lock(exp, op_data, NULL, 0,
 -                                  /* we are not interested in name
 -                                   * based lookup
 -                                   */
 -                                  &oit, 0, &req,
 -                                  ll_md_blocking_ast, 0);
 +              rc = md_intent_lock(exp, op_data, &oit, &req,
 +                                  &ll_md_blocking_ast, 0);
                ll_finish_md_op_data(op_data);
 -              oit.it_create_mode &= ~M_CHECK_STALE;
                if (rc < 0) {
                        rc = ll_inode_revalidate_fini(inode, rc);
                        goto out;
                op_data->op_valid = valid;
                rc = md_getattr(sbi->ll_md_exp, op_data, &req);
                ll_finish_md_op_data(op_data);
 -              if (rc) {
 -                      rc = ll_inode_revalidate_fini(inode, rc);
 -                      return rc;
 -              }
 +              if (rc)
 +                      return ll_inode_revalidate_fini(inode, rc);
  
                rc = ll_prep_inode(&inode, req, NULL, NULL);
        }
        return rc;
  }
  
 +static int ll_merge_md_attr(struct inode *inode)
 +{
 +      struct cl_attr attr = { 0 };
 +      int rc;
 +
 +      LASSERT(ll_i2info(inode)->lli_lsm_md);
 +      rc = md_merge_attr(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md,
 +                         &attr, ll_md_blocking_ast);
 +      if (rc)
 +              return rc;
 +
 +      set_nlink(inode, attr.cat_nlink);
 +      inode->i_blocks = attr.cat_blocks;
 +      i_size_write(inode, attr.cat_size);
 +
 +      ll_i2info(inode)->lli_atime = attr.cat_atime;
 +      ll_i2info(inode)->lli_mtime = attr.cat_mtime;
 +      ll_i2info(inode)->lli_ctime = attr.cat_ctime;
 +
 +      return 0;
 +}
 +
  static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
  {
        struct inode *inode = d_inode(dentry);
  
        /* if object isn't regular file, don't validate size */
        if (!S_ISREG(inode->i_mode)) {
 +              if (S_ISDIR(inode->i_mode) &&
 +                  ll_i2info(inode)->lli_lsm_md) {
 +                      rc = ll_merge_md_attr(inode);
 +                      if (rc)
 +                              return rc;
 +              }
 +
                LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_atime;
                LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_mtime;
                LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_ctime;
@@@ -3067,14 -3057,13 +3067,14 @@@ int ll_getattr(struct vfsmount *mnt, st
        if (res)
                return res;
  
 +      OBD_FAIL_TIMEOUT(OBD_FAIL_GETATTR_DELAY, 30);
 +
        stat->dev = inode->i_sb->s_dev;
        if (ll_need_32bit_api(sbi))
                stat->ino = cl_fid_build_ino(&lli->lli_fid, 1);
        else
                stat->ino = inode->i_ino;
        stat->mode = inode->i_mode;
 -      stat->nlink = inode->i_nlink;
        stat->uid = inode->i_uid;
        stat->gid = inode->i_gid;
        stat->rdev = inode->i_rdev;
        stat->ctime = inode->i_ctime;
        stat->blksize = 1 << inode->i_blkbits;
  
 +      stat->nlink = inode->i_nlink;
        stat->size = i_size_read(inode);
        stat->blocks = inode->i_blocks;
  
@@@ -3151,12 -3139,6 +3151,12 @@@ struct posix_acl *ll_get_acl(struct ino
  
  int ll_inode_permission(struct inode *inode, int mask)
  {
 +      struct ll_sb_info *sbi;
 +      struct root_squash_info *squash;
 +      const struct cred *old_cred = NULL;
 +      struct cred *cred = NULL;
 +      bool squash_id = false;
 +      cfs_cap_t cap;
        int rc = 0;
  
        if (mask & MAY_NOT_BLOCK)
        CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n",
               PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask);
  
 +      /* squash fsuid/fsgid if needed */
 +      sbi = ll_i2sbi(inode);
 +      squash = &sbi->ll_squash;
 +      if (unlikely(squash->rsi_uid &&
 +                   uid_eq(current_fsuid(), GLOBAL_ROOT_UID) &&
 +                   !(sbi->ll_flags & LL_SBI_NOROOTSQUASH))) {
 +              squash_id = true;
 +      }
 +
 +      if (squash_id) {
 +              CDEBUG(D_OTHER, "squash creds (%d:%d)=>(%d:%d)\n",
 +                     __kuid_val(current_fsuid()), __kgid_val(current_fsgid()),
 +                     squash->rsi_uid, squash->rsi_gid);
 +
 +              /*
 +               * update current process's credentials
 +               * and FS capability
 +               */
 +              cred = prepare_creds();
 +              if (!cred)
 +                      return -ENOMEM;
 +
 +              cred->fsuid = make_kuid(&init_user_ns, squash->rsi_uid);
 +              cred->fsgid = make_kgid(&init_user_ns, squash->rsi_gid);
 +              for (cap = 0; cap < sizeof(cfs_cap_t) * 8; cap++) {
 +                      if ((1 << cap) & CFS_CAP_FS_MASK)
 +                              cap_lower(cred->cap_effective, cap);
 +              }
 +              old_cred = override_creds(cred);
 +      }
 +
        ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_INODE_PERM, 1);
        rc = generic_permission(inode, mask);
  
 +      /* restore current process's credentials and FS capability */
 +      if (squash_id) {
 +              revert_creds(old_cred);
 +              put_cred(cred);
 +      }
 +
        return rc;
  }
  
@@@ -3228,7 -3173,7 +3228,7 @@@ struct file_operations ll_file_operatio
        .release        = ll_file_release,
        .mmap      = ll_file_mmap,
        .llseek  = ll_file_seek,
 -      .splice_read    = ll_file_splice_read,
 +      .splice_read    = generic_file_splice_read,
        .fsync    = ll_fsync,
        .flush    = ll_flush
  };
@@@ -3241,7 -3186,7 +3241,7 @@@ struct file_operations ll_file_operatio
        .release        = ll_file_release,
        .mmap      = ll_file_mmap,
        .llseek  = ll_file_seek,
 -      .splice_read    = ll_file_splice_read,
 +      .splice_read    = generic_file_splice_read,
        .fsync    = ll_fsync,
        .flush    = ll_flush,
        .flock    = ll_file_flock,
@@@ -3257,7 -3202,7 +3257,7 @@@ struct file_operations ll_file_operatio
        .release        = ll_file_release,
        .mmap      = ll_file_mmap,
        .llseek  = ll_file_seek,
 -      .splice_read    = ll_file_splice_read,
 +      .splice_read    = generic_file_splice_read,
        .fsync    = ll_fsync,
        .flush    = ll_flush,
        .flock    = ll_file_noflock,
@@@ -3268,10 -3213,10 +3268,10 @@@ const struct inode_operations ll_file_i
        .setattr        = ll_setattr,
        .getattr        = ll_getattr,
        .permission     = ll_inode_permission,
 -      .setxattr       = ll_setxattr,
 -      .getxattr       = ll_getxattr,
 +      .setxattr       = generic_setxattr,
 +      .getxattr       = generic_getxattr,
        .listxattr      = ll_listxattr,
 -      .removexattr    = ll_removexattr,
 +      .removexattr    = generic_removexattr,
        .fiemap         = ll_fiemap,
        .get_acl        = ll_get_acl,
  };
@@@ -3306,6 -3251,7 +3306,6 @@@ void *ll_iocontrol_register(llioc_callb
        if (!in_data)
                return NULL;
  
 -      memset(in_data, 0, sizeof(*in_data));
        in_data->iocd_size = size;
        in_data->iocd_cb = cb;
        in_data->iocd_count = count;
@@@ -3443,7 -3389,7 +3443,7 @@@ static int ll_layout_fetch(struct inod
                goto out;
        }
  
 -      lmmsize = body->eadatasize;
 +      lmmsize = body->mbo_eadatasize;
        if (lmmsize == 0) /* empty layout */ {
                rc = 0;
                goto out;
@@@ -3501,7 -3447,7 +3501,7 @@@ static int ll_layout_lock_set(struct lu
                   PFID(&lli->lli_fid), inode, reconf);
  
        /* in case this is a caching lock and reinstate with new inode */
 -      md_set_lock_data(sbi->ll_md_exp, &lockh->cookie, inode, NULL);
 +      md_set_lock_data(sbi->ll_md_exp, lockh, inode, NULL);
  
        lock_res_and_lock(lock);
        lvb_ready = ldlm_is_lvb_ready(lock);
@@@ -3611,8 -3557,8 +3611,8 @@@ int ll_layout_refresh(struct inode *ino
        struct ldlm_enqueue_info einfo = {
                .ei_type = LDLM_IBITS,
                .ei_mode = LCK_CR,
 -              .ei_cb_bl = ll_md_blocking_ast,
 -              .ei_cb_cp = ldlm_completion_ast,
 +              .ei_cb_bl = &ll_md_blocking_ast,
 +              .ei_cb_cp = &ldlm_completion_ast,
        };
        int rc;
  
@@@ -3658,7 -3604,8 +3658,7 @@@ again
                          ll_get_fsname(inode->i_sb, NULL, 0),
                          PFID(&lli->lli_fid), inode);
  
 -      rc = md_enqueue(sbi->ll_md_exp, &einfo, &it, op_data, &lockh,
 -                      NULL, 0, NULL, 0);
 +      rc = md_enqueue(sbi->ll_md_exp, &einfo, NULL, &it, op_data, &lockh, 0);
        ptlrpc_req_finished(it.it_request);
        it.it_request = NULL;
  
@@@ -41,7 -41,7 +41,7 @@@
  #include <linux/types.h>
  #include <linux/mm.h>
  
 -#include "../include/lustre_lite.h"
 +#include "../include/lustre/lustre_ioctl.h"
  #include "../include/lustre_ha.h"
  #include "../include/lustre_dlm.h"
  #include "../include/lprocfs_status.h"
@@@ -115,16 -115,9 +115,16 @@@ static struct ll_sb_info *ll_init_sbi(s
        sbi->ll_sa_max = LL_SA_RPC_DEF;
        atomic_set(&sbi->ll_sa_total, 0);
        atomic_set(&sbi->ll_sa_wrong, 0);
 +      atomic_set(&sbi->ll_sa_running, 0);
        atomic_set(&sbi->ll_agl_total, 0);
        sbi->ll_flags |= LL_SBI_AGL_ENABLED;
  
 +      /* root squash */
 +      sbi->ll_squash.rsi_uid = 0;
 +      sbi->ll_squash.rsi_gid = 0;
 +      INIT_LIST_HEAD(&sbi->ll_squash.rsi_nosquash_nids);
 +      init_rwsem(&sbi->ll_squash.rsi_sem);
 +
        sbi->ll_sb = sb;
  
        return sbi;
@@@ -135,8 -128,6 +135,8 @@@ static void ll_free_sbi(struct super_bl
        struct ll_sb_info *sbi = ll_s2sbi(sb);
  
        if (sbi->ll_cache) {
 +              if (!list_empty(&sbi->ll_squash.rsi_nosquash_nids))
 +                      cfs_free_nidlist(&sbi->ll_squash.rsi_nosquash_nids);
                cl_cache_decref(sbi->ll_cache);
                sbi->ll_cache = NULL;
        }
@@@ -189,9 -180,7 +189,9 @@@ static int client_common_fill_super(str
                                  OBD_CONNECT_PINGLESS |
                                  OBD_CONNECT_MAX_EASIZE |
                                  OBD_CONNECT_FLOCK_DEAD |
 -                                OBD_CONNECT_DISP_STRIPE;
 +                                OBD_CONNECT_DISP_STRIPE | OBD_CONNECT_LFSCK |
 +                                OBD_CONNECT_OPEN_BY_FID |
 +                                OBD_CONNECT_DIR_STRIPE;
  
        if (sbi->ll_flags & LL_SBI_SOM_PREVIEW)
                data->ocd_connect_flags |= OBD_CONNECT_SOM;
                sbi->ll_flags |= LL_SBI_64BIT_HASH;
  
        if (data->ocd_connect_flags & OBD_CONNECT_BRW_SIZE)
 -              sbi->ll_md_brw_size = data->ocd_brw_size;
 +              sbi->ll_md_brw_pages = data->ocd_brw_size >> PAGE_SHIFT;
        else
 -              sbi->ll_md_brw_size = PAGE_SIZE;
 +              sbi->ll_md_brw_pages = 1;
  
        if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK)
                sbi->ll_flags |= LL_SBI_LAYOUT_LOCK;
        CDEBUG(D_SUPER, "rootfid "DFID"\n", PFID(&sbi->ll_root_fid));
  
        sb->s_op = &lustre_super_operations;
 +      sb->s_xattr = ll_xattr_handlers;
  #if THREAD_SIZE >= 8192 /*b=17630*/
        sb->s_export_op = &lustre_export_operations;
  #endif
        md_free_lustre_md(sbi->ll_md_exp, &lmd);
        ptlrpc_req_finished(request);
  
 -      if (!(root)) {
 +      if (IS_ERR(root)) {
                if (lmd.lsm)
                        obd_free_memmd(sbi->ll_dt_exp, &lmd.lsm);
  #ifdef CONFIG_FS_POSIX_ACL
        err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CHECKSUM),
                                 KEY_CHECKSUM, sizeof(checksum), &checksum,
                                 NULL);
 +      if (err) {
 +              CERROR("%s: Set checksum failed: rc = %d\n",
 +                     sbi->ll_dt_exp->exp_obd->obd_name, err);
 +              goto out_root;
 +      }
        cl_sb_init(sb);
  
        err = obd_set_info_async(NULL, sbi->ll_dt_exp, sizeof(KEY_CACHE_SET),
                                 KEY_CACHE_SET, sizeof(*sbi->ll_cache),
                                 sbi->ll_cache, NULL);
 +      if (err) {
 +              CERROR("%s: Set cache_set failed: rc = %d\n",
 +                     sbi->ll_dt_exp->exp_obd->obd_name, err);
 +              goto out_root;
 +      }
  
        sb->s_root = d_make_root(root);
        if (!sb->s_root) {
@@@ -582,17 -560,6 +582,17 @@@ int ll_get_max_mdsize(struct ll_sb_inf
        return rc;
  }
  
 +/**
 + * Get the value of the default_easize parameter.
 + *
 + * \see client_obd::cl_default_mds_easize
 + *
 + * \param[in]  sbi    superblock info for this filesystem
 + * \param[out] lmmsize        pointer to storage location for value
 + *
 + * \retval 0          on success
 + * \retval negative   negated errno on failure
 + */
  int ll_get_default_mdsize(struct ll_sb_info *sbi, int *lmmsize)
  {
        int size, rc;
        return rc;
  }
  
 +/**
 + * Set the default_easize parameter to the given value.
 + *
 + * \see client_obd::cl_default_mds_easize
 + *
 + * \param[in] sbi     superblock info for this filesystem
 + * \param[in] lmmsize the size to set
 + *
 + * \retval 0          on success
 + * \retval negative   negated errno on failure
 + */
 +int ll_set_default_mdsize(struct ll_sb_info *sbi, int lmmsize)
 +{
 +      if (lmmsize < sizeof(struct lov_mds_md) ||
 +          lmmsize > OBD_MAX_DEFAULT_EA_SIZE)
 +              return -EINVAL;
 +
 +      return obd_set_info_async(NULL, sbi->ll_md_exp,
 +                                sizeof(KEY_DEFAULT_EASIZE),
 +                                KEY_DEFAULT_EASIZE,
 +                                sizeof(int), &lmmsize, NULL);
 +}
 +
  static void client_common_put_super(struct super_block *sb)
  {
        struct ll_sb_info *sbi = ll_s2sbi(sb);
@@@ -664,12 -608,6 +664,12 @@@ void ll_kill_super(struct super_block *
        if (sbi) {
                sb->s_dev = sbi->ll_sdev_orig;
                sbi->ll_umounting = 1;
 +
 +              /* wait running statahead threads to quit */
 +              while (atomic_read(&sbi->ll_sa_running) > 0) {
 +                      set_current_state(TASK_UNINTERRUPTIBLE);
 +                      schedule_timeout(msecs_to_jiffies(MSEC_PER_SEC >> 3));
 +              }
        }
  }
  
@@@ -709,8 -647,7 +709,8 @@@ static int ll_options(char *options, in
                        *flags |= tmp;
                        goto next;
                }
 -              tmp = ll_set_opt("noflock", s1, LL_SBI_FLOCK|LL_SBI_LOCALFLOCK);
 +              tmp = ll_set_opt("noflock", s1,
 +                               LL_SBI_FLOCK | LL_SBI_LOCALFLOCK);
                if (tmp) {
                        *flags &= ~tmp;
                        goto next;
@@@ -835,13 -772,11 +835,13 @@@ void ll_lli_init(struct ll_inode_info *
                lli->lli_sai = NULL;
                spin_lock_init(&lli->lli_sa_lock);
                lli->lli_opendir_pid = 0;
 +              lli->lli_sa_enabled = 0;
 +              lli->lli_def_stripe_offset = -1;
        } else {
                mutex_init(&lli->lli_size_mutex);
                lli->lli_symlink_name = NULL;
                init_rwsem(&lli->lli_trunc_sem);
 -              mutex_init(&lli->lli_write_mutex);
 +              range_lock_tree_init(&lli->lli_write_tree);
                init_rwsem(&lli->lli_glimpse_sem);
                lli->lli_glimpse_time = 0;
                INIT_LIST_HEAD(&lli->lli_agl_list);
@@@ -961,8 -896,7 +961,8 @@@ void ll_put_super(struct super_block *s
        struct lustre_sb_info *lsi = s2lsi(sb);
        struct ll_sb_info *sbi = ll_s2sbi(sb);
        char *profilenm = get_profile_name(sb);
 -      int ccc_count, next, force = 1, rc = 0;
 +      int next, force = 1, rc = 0;
 +      long ccc_count;
  
        CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm);
  
                struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL);
  
                rc = l_wait_event(sbi->ll_cache->ccc_unstable_waitq,
 -                                !atomic_read(&sbi->ll_cache->ccc_unstable_nr),
 +                                !atomic_long_read(&sbi->ll_cache->ccc_unstable_nr),
                                  &lwi);
        }
  
 -      ccc_count = atomic_read(&sbi->ll_cache->ccc_unstable_nr);
 +      ccc_count = atomic_long_read(&sbi->ll_cache->ccc_unstable_nr);
        if (!force && rc != -EINTR)
 -              LASSERTF(!ccc_count, "count: %i\n", ccc_count);
 +              LASSERTF(!ccc_count, "count: %li\n", ccc_count);
  
        /* We need to set force before the lov_disconnect in
         * lustre_common_put_super, since l_d cleans up osc's as well.
@@@ -1057,206 -991,6 +1057,206 @@@ struct inode *ll_inode_from_resource_lo
        return inode;
  }
  
 +static void ll_dir_clear_lsm_md(struct inode *inode)
 +{
 +      struct ll_inode_info *lli = ll_i2info(inode);
 +
 +      LASSERT(S_ISDIR(inode->i_mode));
 +
 +      if (lli->lli_lsm_md) {
 +              lmv_free_memmd(lli->lli_lsm_md);
 +              lli->lli_lsm_md = NULL;
 +      }
 +}
 +
 +static struct inode *ll_iget_anon_dir(struct super_block *sb,
 +                                    const struct lu_fid *fid,
 +                                    struct lustre_md *md)
 +{
 +      struct ll_sb_info *sbi = ll_s2sbi(sb);
 +      struct mdt_body *body = md->body;
 +      struct inode *inode;
 +      ino_t ino;
 +
 +      ino = cl_fid_build_ino(fid, sbi->ll_flags & LL_SBI_32BIT_API);
 +      inode = iget_locked(sb, ino);
 +      if (!inode) {
 +              CERROR("%s: failed get simple inode "DFID": rc = -ENOENT\n",
 +                     ll_get_fsname(sb, NULL, 0), PFID(fid));
 +              return ERR_PTR(-ENOENT);
 +      }
 +
 +      if (inode->i_state & I_NEW) {
 +              struct ll_inode_info *lli = ll_i2info(inode);
 +              struct lmv_stripe_md *lsm = md->lmv;
 +
 +              inode->i_mode = (inode->i_mode & ~S_IFMT) |
 +                              (body->mbo_mode & S_IFMT);
 +              LASSERTF(S_ISDIR(inode->i_mode), "Not slave inode "DFID"\n",
 +                       PFID(fid));
 +
 +              LTIME_S(inode->i_mtime) = 0;
 +              LTIME_S(inode->i_atime) = 0;
 +              LTIME_S(inode->i_ctime) = 0;
 +              inode->i_rdev = 0;
 +
 +              inode->i_op = &ll_dir_inode_operations;
 +              inode->i_fop = &ll_dir_operations;
 +              lli->lli_fid = *fid;
 +              ll_lli_init(lli);
 +
 +              LASSERT(lsm);
 +              /* master object FID */
 +              lli->lli_pfid = body->mbo_fid1;
 +              CDEBUG(D_INODE, "lli %p slave "DFID" master "DFID"\n",
 +                     lli, PFID(fid), PFID(&lli->lli_pfid));
 +              unlock_new_inode(inode);
 +      }
 +
 +      return inode;
 +}
 +
 +static int ll_init_lsm_md(struct inode *inode, struct lustre_md *md)
 +{
 +      struct lmv_stripe_md *lsm = md->lmv;
 +      struct lu_fid *fid;
 +      int i;
 +
 +      LASSERT(lsm);
 +      /*
 +       * XXX sigh, this lsm_root initialization should be in
 +       * LMV layer, but it needs ll_iget right now, so we
 +       * put this here right now.
 +       */
 +      for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
 +              fid = &lsm->lsm_md_oinfo[i].lmo_fid;
 +              LASSERT(!lsm->lsm_md_oinfo[i].lmo_root);
 +              /* Unfortunately ll_iget will call ll_update_inode,
 +               * where the initialization of slave inode is slightly
 +               * different, so it reset lsm_md to NULL to avoid
 +               * initializing lsm for slave inode.
 +               */
 +              /* For migrating inode, master stripe and master object will
 +               * be same, so we only need assign this inode
 +               */
 +              if (lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION && !i)
 +                      lsm->lsm_md_oinfo[i].lmo_root = inode;
 +              else
 +                      lsm->lsm_md_oinfo[i].lmo_root =
 +                              ll_iget_anon_dir(inode->i_sb, fid, md);
 +              if (IS_ERR(lsm->lsm_md_oinfo[i].lmo_root)) {
 +                      int rc = PTR_ERR(lsm->lsm_md_oinfo[i].lmo_root);
 +
 +                      lsm->lsm_md_oinfo[i].lmo_root = NULL;
 +                      return rc;
 +              }
 +      }
 +
 +      return 0;
 +}
 +
 +static inline int lli_lsm_md_eq(const struct lmv_stripe_md *lsm_md1,
 +                              const struct lmv_stripe_md *lsm_md2)
 +{
 +      return lsm_md1->lsm_md_magic == lsm_md2->lsm_md_magic &&
 +             lsm_md1->lsm_md_stripe_count == lsm_md2->lsm_md_stripe_count &&
 +             lsm_md1->lsm_md_master_mdt_index ==
 +                      lsm_md2->lsm_md_master_mdt_index &&
 +             lsm_md1->lsm_md_hash_type == lsm_md2->lsm_md_hash_type &&
 +             lsm_md1->lsm_md_layout_version ==
 +                      lsm_md2->lsm_md_layout_version &&
 +             !strcmp(lsm_md1->lsm_md_pool_name,
 +                     lsm_md2->lsm_md_pool_name);
 +}
 +
 +static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md)
 +{
 +      struct ll_inode_info *lli = ll_i2info(inode);
 +      struct lmv_stripe_md *lsm = md->lmv;
 +      int rc;
 +
 +      LASSERT(S_ISDIR(inode->i_mode));
 +      CDEBUG(D_INODE, "update lsm %p of "DFID"\n", lli->lli_lsm_md,
 +             PFID(ll_inode2fid(inode)));
 +
 +      /* no striped information from request. */
 +      if (!lsm) {
 +              if (!lli->lli_lsm_md) {
 +                      return 0;
 +              } else if (lli->lli_lsm_md->lsm_md_hash_type &
 +                         LMV_HASH_FLAG_MIGRATION) {
 +                      /*
 +                       * migration is done, the temporay MIGRATE layout has
 +                       * been removed
 +                       */
 +                      CDEBUG(D_INODE, DFID" finish migration.\n",
 +                             PFID(ll_inode2fid(inode)));
 +                      lmv_free_memmd(lli->lli_lsm_md);
 +                      lli->lli_lsm_md = NULL;
 +                      return 0;
 +              } else {
 +                      /*
 +                       * The lustre_md from req does not include stripeEA,
 +                       * see ll_md_setattr
 +                       */
 +                      return 0;
 +              }
 +      }
 +
 +      /* set the directory layout */
 +      if (!lli->lli_lsm_md) {
 +              rc = ll_init_lsm_md(inode, md);
 +              if (rc)
 +                      return rc;
 +
 +              lli->lli_lsm_md = lsm;
 +              /*
 +               * set lsm_md to NULL, so the following free lustre_md
 +               * will not free this lsm
 +               */
 +              md->lmv = NULL;
 +              CDEBUG(D_INODE, "Set lsm %p magic %x to "DFID"\n", lsm,
 +                     lsm->lsm_md_magic, PFID(ll_inode2fid(inode)));
 +              return 0;
 +      }
 +
 +      /* Compare the old and new stripe information */
 +      if (!lsm_md_eq(lli->lli_lsm_md, lsm)) {
 +              struct lmv_stripe_md *old_lsm = lli->lli_lsm_md;
 +              int idx;
 +
 +              CERROR("%s: inode "DFID"(%p)'s lmv layout mismatch (%p)/(%p) magic:0x%x/0x%x stripe count: %d/%d master_mdt: %d/%d hash_type:0x%x/0x%x layout: 0x%x/0x%x pool:%s/%s\n",
 +                     ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid),
 +                     inode, lsm, old_lsm,
 +                     lsm->lsm_md_magic, old_lsm->lsm_md_magic,
 +                     lsm->lsm_md_stripe_count,
 +                     old_lsm->lsm_md_stripe_count,
 +                     lsm->lsm_md_master_mdt_index,
 +                     old_lsm->lsm_md_master_mdt_index,
 +                     lsm->lsm_md_hash_type, old_lsm->lsm_md_hash_type,
 +                     lsm->lsm_md_layout_version,
 +                     old_lsm->lsm_md_layout_version,
 +                     lsm->lsm_md_pool_name,
 +                     old_lsm->lsm_md_pool_name);
 +
 +              for (idx = 0; idx < old_lsm->lsm_md_stripe_count; idx++) {
 +                      CERROR("%s: sub FIDs in old lsm idx %d, old: "DFID"\n",
 +                             ll_get_fsname(inode->i_sb, NULL, 0), idx,
 +                             PFID(&old_lsm->lsm_md_oinfo[idx].lmo_fid));
 +              }
 +
 +              for (idx = 0; idx < lsm->lsm_md_stripe_count; idx++) {
 +                      CERROR("%s: sub FIDs in new lsm idx %d, new: "DFID"\n",
 +                             ll_get_fsname(inode->i_sb, NULL, 0), idx,
 +                             PFID(&lsm->lsm_md_oinfo[idx].lmo_fid));
 +              }
 +
 +              return -EIO;
 +      }
 +
 +      return 0;
 +}
 +
  void ll_clear_inode(struct inode *inode)
  {
        struct ll_inode_info *lli = ll_i2info(inode);
  
  #ifdef CONFIG_FS_POSIX_ACL
        if (lli->lli_posix_acl) {
 -              LASSERT(atomic_read(&lli->lli_posix_acl->a_refcount) == 1);
                posix_acl_release(lli->lli_posix_acl);
                lli->lli_posix_acl = NULL;
        }
  #endif
        lli->lli_inode_magic = LLI_INODE_DEAD;
  
 -      if (!S_ISDIR(inode->i_mode))
 +      if (S_ISDIR(inode->i_mode))
 +              ll_dir_clear_lsm_md(inode);
 +      if (S_ISREG(inode->i_mode) && !is_bad_inode(inode))
                LASSERT(list_empty(&lli->lli_agl_list));
  
        /*
@@@ -1370,10 -1103,10 +1370,10 @@@ static int ll_md_setattr(struct dentry 
        op_data->op_attr.ia_valid = ia_valid;
  
        /* Extract epoch data if obtained. */
 -      op_data->op_handle = md.body->handle;
 -      op_data->op_ioepoch = md.body->ioepoch;
 +      op_data->op_handle = md.body->mbo_handle;
 +      op_data->op_ioepoch = md.body->mbo_ioepoch;
  
 -      ll_update_inode(inode, &md);
 +      rc = ll_update_inode(inode, &md);
        ptlrpc_req_finished(request);
  
        return rc;
@@@ -1405,8 -1138,8 +1405,8 @@@ static int ll_setattr_done_writing(stru
                rc = ll_som_update(inode, op_data);
        else if (rc) {
                CERROR("%s: inode "DFID" mdc truncate failed: rc = %d\n",
 -                    ll_i2sbi(inode)->ll_md_exp->exp_obd->obd_name,
 -                    PFID(ll_inode2fid(inode)), rc);
 +                     ll_i2sbi(inode)->ll_md_exp->exp_obd->obd_name,
 +                     PFID(ll_inode2fid(inode)), rc);
        }
        return rc;
  }
@@@ -1459,7 -1192,7 +1459,7 @@@ int ll_setattr_raw(struct dentry *dentr
                attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
        }
  
-       /* POSIX: check before ATTR_*TIME_SET set (from inode_change_ok) */
+       /* POSIX: check before ATTR_*TIME_SET set (from setattr_prepare) */
        if (attr->ia_valid & TIMES_SET_FLAGS) {
                if ((!uid_eq(current_fsuid(), inode->i_uid)) &&
                    !capable(CFS_CAP_FOWNER))
@@@ -1598,14 -1331,14 +1598,14 @@@ int ll_setattr(struct dentry *de, struc
  {
        int mode = d_inode(de)->i_mode;
  
 -      if ((attr->ia_valid & (ATTR_CTIME|ATTR_SIZE|ATTR_MODE)) ==
 -                            (ATTR_CTIME|ATTR_SIZE|ATTR_MODE))
 +      if ((attr->ia_valid & (ATTR_CTIME | ATTR_SIZE | ATTR_MODE)) ==
 +                            (ATTR_CTIME | ATTR_SIZE | ATTR_MODE))
                attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE;
  
 -      if (((attr->ia_valid & (ATTR_MODE|ATTR_FORCE|ATTR_SIZE)) ==
 -                             (ATTR_SIZE|ATTR_MODE)) &&
 +      if (((attr->ia_valid & (ATTR_MODE | ATTR_FORCE | ATTR_SIZE)) ==
 +                             (ATTR_SIZE | ATTR_MODE)) &&
            (((mode & S_ISUID) && !(attr->ia_mode & S_ISUID)) ||
 -           (((mode & (S_ISGID|S_IXGRP)) == (S_ISGID|S_IXGRP)) &&
 +           (((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) &&
              !(attr->ia_mode & S_ISGID))))
                attr->ia_valid |= ATTR_FORCE;
  
                attr->ia_valid |= ATTR_KILL_SUID;
  
        if ((attr->ia_valid & ATTR_MODE) &&
 -          ((mode & (S_ISGID|S_IXGRP)) == (S_ISGID|S_IXGRP)) &&
 +          ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) &&
            !(attr->ia_mode & S_ISGID) &&
            !(attr->ia_valid & ATTR_KILL_SGID))
                attr->ia_valid |= ATTR_KILL_SGID;
@@@ -1732,14 -1465,14 +1732,14 @@@ void ll_inode_size_unlock(struct inode 
        mutex_unlock(&lli->lli_size_mutex);
  }
  
 -void ll_update_inode(struct inode *inode, struct lustre_md *md)
 +int ll_update_inode(struct inode *inode, struct lustre_md *md)
  {
        struct ll_inode_info *lli = ll_i2info(inode);
        struct mdt_body *body = md->body;
        struct lov_stripe_md *lsm = md->lsm;
        struct ll_sb_info *sbi = ll_i2sbi(inode);
  
 -      LASSERT((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
 +      LASSERT((lsm != NULL) == ((body->mbo_valid & OBD_MD_FLEASIZE) != 0));
        if (lsm) {
                if (!lli->lli_has_smd &&
                    !(sbi->ll_flags & LL_SBI_LAYOUT_LOCK))
                        lli->lli_maxbytes = MAX_LFS_FILESIZE;
        }
  
 +      if (S_ISDIR(inode->i_mode)) {
 +              int rc;
 +
 +              rc = ll_update_lsm_md(inode, md);
 +              if (rc)
 +                      return rc;
 +      }
 +
  #ifdef CONFIG_FS_POSIX_ACL
 -      if (body->valid & OBD_MD_FLACL) {
 +      if (body->mbo_valid & OBD_MD_FLACL) {
                spin_lock(&lli->lli_lock);
                if (lli->lli_posix_acl)
                        posix_acl_release(lli->lli_posix_acl);
                spin_unlock(&lli->lli_lock);
        }
  #endif
 -      inode->i_ino = cl_fid_build_ino(&body->fid1,
 +      inode->i_ino = cl_fid_build_ino(&body->mbo_fid1,
                                        sbi->ll_flags & LL_SBI_32BIT_API);
 -      inode->i_generation = cl_fid_build_gen(&body->fid1);
 +      inode->i_generation = cl_fid_build_gen(&body->mbo_fid1);
  
 -      if (body->valid & OBD_MD_FLATIME) {
 -              if (body->atime > LTIME_S(inode->i_atime))
 -                      LTIME_S(inode->i_atime) = body->atime;
 -              lli->lli_atime = body->atime;
 +      if (body->mbo_valid & OBD_MD_FLATIME) {
 +              if (body->mbo_atime > LTIME_S(inode->i_atime))
 +                      LTIME_S(inode->i_atime) = body->mbo_atime;
 +              lli->lli_atime = body->mbo_atime;
        }
 -      if (body->valid & OBD_MD_FLMTIME) {
 -              if (body->mtime > LTIME_S(inode->i_mtime)) {
 +      if (body->mbo_valid & OBD_MD_FLMTIME) {
 +              if (body->mbo_mtime > LTIME_S(inode->i_mtime)) {
                        CDEBUG(D_INODE, "setting ino %lu mtime from %lu to %llu\n",
                               inode->i_ino, LTIME_S(inode->i_mtime),
 -                             body->mtime);
 -                      LTIME_S(inode->i_mtime) = body->mtime;
 +                             body->mbo_mtime);
 +                      LTIME_S(inode->i_mtime) = body->mbo_mtime;
                }
 -              lli->lli_mtime = body->mtime;
 -      }
 -      if (body->valid & OBD_MD_FLCTIME) {
 -              if (body->ctime > LTIME_S(inode->i_ctime))
 -                      LTIME_S(inode->i_ctime) = body->ctime;
 -              lli->lli_ctime = body->ctime;
 -      }
 -      if (body->valid & OBD_MD_FLMODE)
 -              inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT);
 -      if (body->valid & OBD_MD_FLTYPE)
 -              inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT);
 +              lli->lli_mtime = body->mbo_mtime;
 +      }
 +      if (body->mbo_valid & OBD_MD_FLCTIME) {
 +              if (body->mbo_ctime > LTIME_S(inode->i_ctime))
 +                      LTIME_S(inode->i_ctime) = body->mbo_ctime;
 +              lli->lli_ctime = body->mbo_ctime;
 +      }
 +      if (body->mbo_valid & OBD_MD_FLMODE)
 +              inode->i_mode = (inode->i_mode & S_IFMT) |
 +                              (body->mbo_mode & ~S_IFMT);
 +      if (body->mbo_valid & OBD_MD_FLTYPE)
 +              inode->i_mode = (inode->i_mode & ~S_IFMT) |
 +                              (body->mbo_mode & S_IFMT);
        LASSERT(inode->i_mode != 0);
        if (S_ISREG(inode->i_mode))
                inode->i_blkbits = min(PTLRPC_MAX_BRW_BITS + 1,
                                       LL_MAX_BLKSIZE_BITS);
        else
                inode->i_blkbits = inode->i_sb->s_blocksize_bits;
 -      if (body->valid & OBD_MD_FLUID)
 -              inode->i_uid = make_kuid(&init_user_ns, body->uid);
 -      if (body->valid & OBD_MD_FLGID)
 -              inode->i_gid = make_kgid(&init_user_ns, body->gid);
 -      if (body->valid & OBD_MD_FLFLAGS)
 -              inode->i_flags = ll_ext_to_inode_flags(body->flags);
 -      if (body->valid & OBD_MD_FLNLINK)
 -              set_nlink(inode, body->nlink);
 -      if (body->valid & OBD_MD_FLRDEV)
 -              inode->i_rdev = old_decode_dev(body->rdev);
 -
 -      if (body->valid & OBD_MD_FLID) {
 +      if (body->mbo_valid & OBD_MD_FLUID)
 +              inode->i_uid = make_kuid(&init_user_ns, body->mbo_uid);
 +      if (body->mbo_valid & OBD_MD_FLGID)
 +              inode->i_gid = make_kgid(&init_user_ns, body->mbo_gid);
 +      if (body->mbo_valid & OBD_MD_FLFLAGS)
 +              inode->i_flags = ll_ext_to_inode_flags(body->mbo_flags);
 +      if (body->mbo_valid & OBD_MD_FLNLINK)
 +              set_nlink(inode, body->mbo_nlink);
 +      if (body->mbo_valid & OBD_MD_FLRDEV)
 +              inode->i_rdev = old_decode_dev(body->mbo_rdev);
 +
 +      if (body->mbo_valid & OBD_MD_FLID) {
                /* FID shouldn't be changed! */
                if (fid_is_sane(&lli->lli_fid)) {
 -                      LASSERTF(lu_fid_eq(&lli->lli_fid, &body->fid1),
 +                      LASSERTF(lu_fid_eq(&lli->lli_fid, &body->mbo_fid1),
                                 "Trying to change FID "DFID" to the "DFID", inode "DFID"(%p)\n",
 -                               PFID(&lli->lli_fid), PFID(&body->fid1),
 +                               PFID(&lli->lli_fid), PFID(&body->mbo_fid1),
                                 PFID(ll_inode2fid(inode)), inode);
                } else {
 -                      lli->lli_fid = body->fid1;
 +                      lli->lli_fid = body->mbo_fid1;
                }
        }
  
        LASSERT(fid_seq(&lli->lli_fid) != 0);
  
 -      if (body->valid & OBD_MD_FLSIZE) {
 +      if (body->mbo_valid & OBD_MD_FLSIZE) {
                if (exp_connect_som(ll_i2mdexp(inode)) &&
                    S_ISREG(inode->i_mode)) {
                        struct lustre_handle lockh;
                                        /* Use old size assignment to avoid
                                         * deadlock bz14138 & bz14326
                                         */
 -                                      i_size_write(inode, body->size);
 +                                      i_size_write(inode, body->mbo_size);
                                        spin_lock(&lli->lli_lock);
                                        lli->lli_flags |= LLIF_MDS_SIZE_LOCK;
                                        spin_unlock(&lli->lli_lock);
                        /* Use old size assignment to avoid
                         * deadlock bz14138 & bz14326
                         */
 -                      i_size_write(inode, body->size);
 +                      i_size_write(inode, body->mbo_size);
  
                        CDEBUG(D_VFSTRACE, "inode=%lu, updating i_size %llu\n",
 -                             inode->i_ino, (unsigned long long)body->size);
 +                             inode->i_ino, (unsigned long long)body->mbo_size);
                }
  
 -              if (body->valid & OBD_MD_FLBLOCKS)
 -                      inode->i_blocks = body->blocks;
 +              if (body->mbo_valid & OBD_MD_FLBLOCKS)
 +                      inode->i_blocks = body->mbo_blocks;
        }
  
 -      if (body->valid & OBD_MD_TSTATE) {
 -              if (body->t_state & MS_RESTORE)
 +      if (body->mbo_valid & OBD_MD_TSTATE) {
 +              if (body->mbo_t_state & MS_RESTORE)
                        lli->lli_flags |= LLIF_FILE_RESTORING;
        }
 +
 +      return 0;
  }
  
 -void ll_read_inode2(struct inode *inode, void *opaque)
 +int ll_read_inode2(struct inode *inode, void *opaque)
  {
        struct lustre_md *md = opaque;
        struct ll_inode_info *lli = ll_i2info(inode);
 +      int rc;
  
        CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
               PFID(&lli->lli_fid), inode);
        LTIME_S(inode->i_atime) = 0;
        LTIME_S(inode->i_ctime) = 0;
        inode->i_rdev = 0;
 -      ll_update_inode(inode, md);
 +      rc = ll_update_inode(inode, md);
 +      if (rc)
 +              return rc;
  
        /* OIDEBUG(inode); */
  
                init_special_inode(inode, inode->i_mode,
                                   inode->i_rdev);
        }
 +
 +      return 0;
  }
  
  void ll_delete_inode(struct inode *inode)
                 * osc_extent implementation at LU-1030.
                 */
                cl_sync_file_range(inode, 0, OBD_OBJECT_EOF,
 -                                 CL_FSYNC_DISCARD, 1);
 +                                 CL_FSYNC_LOCAL, 1);
  
        truncate_inode_pages_final(&inode->i_data);
  
 -      /* Workaround for LU-118 */
 -      if (inode->i_data.nrpages) {
 -              spin_lock_irq(&inode->i_data.tree_lock);
 -              spin_unlock_irq(&inode->i_data.tree_lock);
 -              LASSERTF(inode->i_data.nrpages == 0,
 -                       "inode="DFID"(%p) nrpages=%lu, see http://jira.whamcloud.com/browse/LU-118\n",
 -                       PFID(ll_inode2fid(inode)), inode,
 -                       inode->i_data.nrpages);
 -      }
 -      /* Workaround end */
 +      LASSERTF(!inode->i_data.nrpages,
 +               "inode=" DFID "(%p) nrpages=%lu, see http://jira.whamcloud.com/browse/LU-118\n",
 +               PFID(ll_inode2fid(inode)), inode, inode->i_data.nrpages);
  
        ll_clear_inode(inode);
        clear_inode(inode);
@@@ -1981,7 -1704,7 +1981,7 @@@ int ll_iocontrol(struct inode *inode, s
  
                body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
  
 -              flags = body->flags;
 +              flags = body->mbo_flags;
  
                ptlrpc_req_finished(req);
  
@@@ -2163,9 -1886,9 +2163,9 @@@ void ll_open_cleanup(struct super_bloc
        if (!op_data)
                return;
  
 -      op_data->op_fid1 = body->fid1;
 -      op_data->op_ioepoch = body->ioepoch;
 -      op_data->op_handle = body->handle;
 +      op_data->op_fid1 = body->mbo_fid1;
 +      op_data->op_ioepoch = body->mbo_ioepoch;
 +      op_data->op_handle = body->mbo_handle;
        op_data->op_mod_time = get_seconds();
        md_close(exp, op_data, NULL, &close_req);
        ptlrpc_req_finished(close_req);
@@@ -2187,9 -1910,7 +2187,9 @@@ int ll_prep_inode(struct inode **inode
                goto cleanup;
  
        if (*inode) {
 -              ll_update_inode(*inode, &md);
 +              rc = ll_update_inode(*inode, &md);
 +              if (rc)
 +                      goto out;
        } else {
                LASSERT(sb);
  
                 * At this point server returns to client's same fid as client
                 * generated for creating. So using ->fid1 is okay here.
                 */
 -              if (!fid_is_sane(&md.body->fid1)) {
 +              if (!fid_is_sane(&md.body->mbo_fid1)) {
                        CERROR("%s: Fid is insane " DFID "\n",
                               ll_get_fsname(sb, NULL, 0),
 -                             PFID(&md.body->fid1));
 +                             PFID(&md.body->mbo_fid1));
                        rc = -EINVAL;
                        goto out;
                }
  
 -              *inode = ll_iget(sb, cl_fid_build_ino(&md.body->fid1,
 +              *inode = ll_iget(sb, cl_fid_build_ino(&md.body->mbo_fid1,
                                             sbi->ll_flags & LL_SBI_32BIT_API),
                                 &md);
 -              if (!*inode) {
 +              if (IS_ERR(*inode)) {
  #ifdef CONFIG_FS_POSIX_ACL
                        if (md.posix_acl) {
                                posix_acl_release(md.posix_acl);
@@@ -2354,20 -2075,11 +2354,20 @@@ int ll_process_config(struct lustre_cf
  /* this function prepares md_op_data hint for passing ot down to MD stack. */
  struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
                                      struct inode *i1, struct inode *i2,
 -                                    const char *name, int namelen,
 -                                    int mode, __u32 opc, void *data)
 +                                    const char *name, size_t namelen,
 +                                    u32 mode, __u32 opc, void *data)
  {
 -      if (namelen > ll_i2sbi(i1)->ll_namelen)
 -              return ERR_PTR(-ENAMETOOLONG);
 +      if (!name) {
 +              /* Do not reuse namelen for something else. */
 +              if (namelen)
 +                      return ERR_PTR(-EINVAL);
 +      } else {
 +              if (namelen > ll_i2sbi(i1)->ll_namelen)
 +                      return ERR_PTR(-ENAMETOOLONG);
 +
 +              if (!lu_name_is_valid_2(name, namelen))
 +                      return ERR_PTR(-EINVAL);
 +      }
  
        if (!op_data)
                op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
  
        ll_i2gids(op_data->op_suppgids, i1, i2);
        op_data->op_fid1 = *ll_inode2fid(i1);
 +      op_data->op_default_stripe_offset = -1;
 +      if (S_ISDIR(i1->i_mode)) {
 +              op_data->op_mea1 = ll_i2info(i1)->lli_lsm_md;
 +              op_data->op_default_stripe_offset =
 +                      ll_i2info(i1)->lli_def_stripe_offset;
 +      }
  
 -      if (i2)
 +      if (i2) {
                op_data->op_fid2 = *ll_inode2fid(i2);
 -      else
 +              if (S_ISDIR(i2->i_mode))
 +                      op_data->op_mea2 = ll_i2info(i2)->lli_lsm_md;
 +      } else {
                fid_zero(&op_data->op_fid2);
 +      }
 +
 +      if (ll_i2sbi(i1)->ll_flags & LL_SBI_64BIT_HASH)
 +              op_data->op_cli_flags |= CLI_HASH64;
 +
 +      if (ll_need_32bit_api(ll_i2sbi(i1)))
 +              op_data->op_cli_flags |= CLI_API32;
  
        op_data->op_name = name;
        op_data->op_namelen = namelen;
        op_data->op_bias = 0;
        op_data->op_cli_flags = 0;
        if ((opc == LUSTRE_OPC_CREATE) && name &&
 -          filename_is_volatile(name, namelen, NULL))
 +          filename_is_volatile(name, namelen, &op_data->op_mds))
                op_data->op_bias |= MDS_CREATE_VOLATILE;
 -      op_data->op_opc = opc;
 -      op_data->op_mds = 0;
 +      else
 +              op_data->op_mds = 0;
        op_data->op_data = data;
  
 -      /* If the file is being opened after mknod() (normally due to NFS)
 -       * try to use the default stripe data from parent directory for
 -       * allocating OST objects.  Try to pass the parent FID to MDS.
 -       */
 -      if (opc == LUSTRE_OPC_CREATE && i1 == i2 && S_ISREG(i2->i_mode) &&
 -          !ll_i2info(i2)->lli_has_smd) {
 -              struct ll_inode_info *lli = ll_i2info(i2);
 -
 -              spin_lock(&lli->lli_lock);
 -              if (likely(!lli->lli_has_smd && !fid_is_zero(&lli->lli_pfid)))
 -                      op_data->op_fid1 = lli->lli_pfid;
 -              spin_unlock(&lli->lli_lock);
 -      }
 -
        /* When called by ll_setattr_raw, file is i1. */
        if (ll_i2info(i1)->lli_flags & LLIF_DATA_MODIFIED)
                op_data->op_bias |= MDS_DATA_MODIFIED;
@@@ -2540,197 -2251,3 +2540,197 @@@ void ll_dirty_page_discard_warn(struct 
        if (buf)
                free_page((unsigned long)buf);
  }
 +
 +ssize_t ll_copy_user_md(const struct lov_user_md __user *md,
 +                      struct lov_user_md **kbuf)
 +{
 +      struct lov_user_md lum;
 +      ssize_t lum_size;
 +
 +      if (copy_from_user(&lum, md, sizeof(lum))) {
 +              lum_size = -EFAULT;
 +              goto no_kbuf;
 +      }
 +
 +      lum_size = ll_lov_user_md_size(&lum);
 +      if (lum_size < 0)
 +              goto no_kbuf;
 +
 +      *kbuf = kzalloc(lum_size, GFP_NOFS);
 +      if (!*kbuf) {
 +              lum_size = -ENOMEM;
 +              goto no_kbuf;
 +      }
 +
 +      if (copy_from_user(*kbuf, md, lum_size) != 0) {
 +              kfree(*kbuf);
 +              *kbuf = NULL;
 +              lum_size = -EFAULT;
 +      }
 +no_kbuf:
 +      return lum_size;
 +}
 +
 +/*
 + * Compute llite root squash state after a change of root squash
 + * configuration setting or add/remove of a lnet nid
 + */
 +void ll_compute_rootsquash_state(struct ll_sb_info *sbi)
 +{
 +      struct root_squash_info *squash = &sbi->ll_squash;
 +      lnet_process_id_t id;
 +      bool matched;
 +      int i;
 +
 +      /* Update norootsquash flag */
 +      down_write(&squash->rsi_sem);
 +      if (list_empty(&squash->rsi_nosquash_nids)) {
 +              sbi->ll_flags &= ~LL_SBI_NOROOTSQUASH;
 +      } else {
 +              /*
 +               * Do not apply root squash as soon as one of our NIDs is
 +               * in the nosquash_nids list
 +               */
 +              matched = false;
 +              i = 0;
 +
 +              while (LNetGetId(i++, &id) != -ENOENT) {
 +                      if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND)
 +                              continue;
 +                      if (cfs_match_nid(id.nid, &squash->rsi_nosquash_nids)) {
 +                              matched = true;
 +                              break;
 +                      }
 +              }
 +              if (matched)
 +                      sbi->ll_flags |= LL_SBI_NOROOTSQUASH;
 +              else
 +                      sbi->ll_flags &= ~LL_SBI_NOROOTSQUASH;
 +      }
 +      up_write(&squash->rsi_sem);
 +}
 +
 +/**
 + * Parse linkea content to extract information about a given hardlink
 + *
 + * \param[in] ldata           - Initialized linkea data
 + * \param[in] linkno          - Link identifier
 + * \param[out]        parent_fid      - The entry's parent FID
 + * \param[in] size            - Entry name destination buffer
 + *
 + * \retval 0 on success
 + * \retval Appropriate negative error code on failure
 + */
 +static int ll_linkea_decode(struct linkea_data *ldata, unsigned int linkno,
 +                          struct lu_fid *parent_fid, struct lu_name *ln)
 +{
 +      unsigned int idx;
 +      int rc;
 +
 +      rc = linkea_init(ldata);
 +      if (rc < 0)
 +              return rc;
 +
 +      if (linkno >= ldata->ld_leh->leh_reccount)
 +              /* beyond last link */
 +              return -ENODATA;
 +
 +      linkea_first_entry(ldata);
 +      for (idx = 0; ldata->ld_lee; idx++) {
 +              linkea_entry_unpack(ldata->ld_lee, &ldata->ld_reclen, ln,
 +                                  parent_fid);
 +              if (idx == linkno)
 +                      break;
 +
 +              linkea_next_entry(ldata);
 +      }
 +
 +      if (idx < linkno)
 +              return -ENODATA;
 +
 +      return 0;
 +}
 +
 +/**
 + * Get parent FID and name of an identified link. Operation is performed for
 + * a given link number, letting the caller iterate over linkno to list one or
 + * all links of an entry.
 + *
 + * \param[in]   file  - File descriptor against which to perform the operation
 + * \param[in,out] arg - User-filled structure containing the linkno to operate
 + *                      on and the available size. It is eventually filled with
 + *                      the requested information or left untouched on error
 + *
 + * \retval - 0 on success
 + * \retval - Appropriate negative error code on failure
 + */
 +int ll_getparent(struct file *file, struct getparent __user *arg)
 +{
 +      struct inode *inode = file_inode(file);
 +      struct linkea_data *ldata;
 +      struct lu_fid parent_fid;
 +      struct lu_buf buf = {
 +              .lb_buf = NULL,
 +              .lb_len = 0
 +      };
 +      struct lu_name ln;
 +      u32 name_size;
 +      u32 linkno;
 +      int rc;
 +
 +      if (!capable(CFS_CAP_DAC_READ_SEARCH) &&
 +          !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
 +              return -EPERM;
 +
 +      if (get_user(name_size, &arg->gp_name_size))
 +              return -EFAULT;
 +
 +      if (get_user(linkno, &arg->gp_linkno))
 +              return -EFAULT;
 +
 +      if (name_size > PATH_MAX)
 +              return -EINVAL;
 +
 +      ldata = kzalloc(sizeof(*ldata), GFP_NOFS);
 +      if (!ldata)
 +              return -ENOMEM;
 +
 +      rc = linkea_data_new(ldata, &buf);
 +      if (rc < 0)
 +              goto ldata_free;
 +
 +      rc = ll_xattr_list(inode, XATTR_NAME_LINK, XATTR_TRUSTED_T, buf.lb_buf,
 +                         buf.lb_len, OBD_MD_FLXATTR);
 +      if (rc < 0)
 +              goto lb_free;
 +
 +      rc = ll_linkea_decode(ldata, linkno, &parent_fid, &ln);
 +      if (rc < 0)
 +              goto lb_free;
 +
 +      if (ln.ln_namelen >= name_size) {
 +              rc = -EOVERFLOW;
 +              goto lb_free;
 +      }
 +
 +      if (copy_to_user(&arg->gp_fid, &parent_fid, sizeof(arg->gp_fid))) {
 +              rc = -EFAULT;
 +              goto lb_free;
 +      }
 +
 +      if (copy_to_user(&arg->gp_name, ln.ln_name, ln.ln_namelen)) {
 +              rc = -EFAULT;
 +              goto lb_free;
 +      }
 +
 +      if (put_user('\0', arg->gp_name + ln.ln_namelen)) {
 +              rc = -EFAULT;
 +              goto lb_free;
 +      }
 +
 +lb_free:
 +      lu_buf_free(&buf);
 +ldata_free:
 +      kfree(ldata);
 +      return rc;
 +}
@@@ -190,30 -190,28 +190,30 @@@ void lustre_assert_wire_constants(void
                 (long long)REINT_SETXATTR);
        LASSERTF(REINT_RMENTRY == 8, "found %lld\n",
                 (long long)REINT_RMENTRY);
 -      LASSERTF(REINT_MAX == 9, "found %lld\n",
 +      LASSERTF(REINT_MIGRATE == 9, "found %lld\n",
 +               (long long)REINT_MIGRATE);
 +      LASSERTF(REINT_MAX == 10, "found %lld\n",
                 (long long)REINT_MAX);
        LASSERTF(DISP_IT_EXECD == 0x00000001UL, "found 0x%.8xUL\n",
 -              (unsigned)DISP_IT_EXECD);
 +               (unsigned)DISP_IT_EXECD);
        LASSERTF(DISP_LOOKUP_EXECD == 0x00000002UL, "found 0x%.8xUL\n",
 -              (unsigned)DISP_LOOKUP_EXECD);
 +               (unsigned)DISP_LOOKUP_EXECD);
        LASSERTF(DISP_LOOKUP_NEG == 0x00000004UL, "found 0x%.8xUL\n",
 -              (unsigned)DISP_LOOKUP_NEG);
 +               (unsigned)DISP_LOOKUP_NEG);
        LASSERTF(DISP_LOOKUP_POS == 0x00000008UL, "found 0x%.8xUL\n",
 -              (unsigned)DISP_LOOKUP_POS);
 +               (unsigned)DISP_LOOKUP_POS);
        LASSERTF(DISP_OPEN_CREATE == 0x00000010UL, "found 0x%.8xUL\n",
 -              (unsigned)DISP_OPEN_CREATE);
 +               (unsigned)DISP_OPEN_CREATE);
        LASSERTF(DISP_OPEN_OPEN == 0x00000020UL, "found 0x%.8xUL\n",
 -              (unsigned)DISP_OPEN_OPEN);
 +               (unsigned)DISP_OPEN_OPEN);
        LASSERTF(DISP_ENQ_COMPLETE == 0x00400000UL, "found 0x%.8xUL\n",
 -              (unsigned)DISP_ENQ_COMPLETE);
 +               (unsigned)DISP_ENQ_COMPLETE);
        LASSERTF(DISP_ENQ_OPEN_REF == 0x00800000UL, "found 0x%.8xUL\n",
 -              (unsigned)DISP_ENQ_OPEN_REF);
 +               (unsigned)DISP_ENQ_OPEN_REF);
        LASSERTF(DISP_ENQ_CREATE_REF == 0x01000000UL, "found 0x%.8xUL\n",
 -              (unsigned)DISP_ENQ_CREATE_REF);
 +               (unsigned)DISP_ENQ_CREATE_REF);
        LASSERTF(DISP_OPEN_LOCK == 0x02000000UL, "found 0x%.8xUL\n",
 -              (unsigned)DISP_OPEN_LOCK);
 +               (unsigned)DISP_OPEN_LOCK);
        LASSERTF(MDS_STATUS_CONN == 1, "found %lld\n",
                 (long long)MDS_STATUS_CONN);
        LASSERTF(MDS_STATUS_LOV == 2, "found %lld\n",
        LASSERTF(LUSTRE_BFLAG_UNCOMMITTED_WRITES == 1, "found %lld\n",
                 (long long)LUSTRE_BFLAG_UNCOMMITTED_WRITES);
        LASSERTF(MF_SOM_CHANGE == 0x00000001UL, "found 0x%.8xUL\n",
 -              (unsigned)MF_SOM_CHANGE);
 +               (unsigned)MF_SOM_CHANGE);
        LASSERTF(MF_EPOCH_OPEN == 0x00000002UL, "found 0x%.8xUL\n",
 -              (unsigned)MF_EPOCH_OPEN);
 +               (unsigned)MF_EPOCH_OPEN);
        LASSERTF(MF_EPOCH_CLOSE == 0x00000004UL, "found 0x%.8xUL\n",
 -              (unsigned)MF_EPOCH_CLOSE);
 +               (unsigned)MF_EPOCH_CLOSE);
        LASSERTF(MF_MDC_CANCEL_FID1 == 0x00000008UL, "found 0x%.8xUL\n",
 -              (unsigned)MF_MDC_CANCEL_FID1);
 +               (unsigned)MF_MDC_CANCEL_FID1);
        LASSERTF(MF_MDC_CANCEL_FID2 == 0x00000010UL, "found 0x%.8xUL\n",
 -              (unsigned)MF_MDC_CANCEL_FID2);
 +               (unsigned)MF_MDC_CANCEL_FID2);
        LASSERTF(MF_MDC_CANCEL_FID3 == 0x00000020UL, "found 0x%.8xUL\n",
 -              (unsigned)MF_MDC_CANCEL_FID3);
 +               (unsigned)MF_MDC_CANCEL_FID3);
        LASSERTF(MF_MDC_CANCEL_FID4 == 0x00000040UL, "found 0x%.8xUL\n",
 -              (unsigned)MF_MDC_CANCEL_FID4);
 +               (unsigned)MF_MDC_CANCEL_FID4);
        LASSERTF(MF_SOM_AU == 0x00000080UL, "found 0x%.8xUL\n",
 -              (unsigned)MF_SOM_AU);
 +               (unsigned)MF_SOM_AU);
        LASSERTF(MF_GETATTR_LOCK == 0x00000100UL, "found 0x%.8xUL\n",
 -              (unsigned)MF_GETATTR_LOCK);
 +               (unsigned)MF_GETATTR_LOCK);
        LASSERTF(MDS_ATTR_MODE == 0x0000000000000001ULL, "found 0x%.16llxULL\n",
 -                      (long long)MDS_ATTR_MODE);
 +               (long long)MDS_ATTR_MODE);
        LASSERTF(MDS_ATTR_UID == 0x0000000000000002ULL, "found 0x%.16llxULL\n",
 -                      (long long)MDS_ATTR_UID);
 +               (long long)MDS_ATTR_UID);
        LASSERTF(MDS_ATTR_GID == 0x0000000000000004ULL, "found 0x%.16llxULL\n",
 -                      (long long)MDS_ATTR_GID);
 +               (long long)MDS_ATTR_GID);
        LASSERTF(MDS_ATTR_SIZE == 0x0000000000000008ULL, "found 0x%.16llxULL\n",
 -                      (long long)MDS_ATTR_SIZE);
 +               (long long)MDS_ATTR_SIZE);
        LASSERTF(MDS_ATTR_ATIME == 0x0000000000000010ULL, "found 0x%.16llxULL\n",
 -                      (long long)MDS_ATTR_ATIME);
 +               (long long)MDS_ATTR_ATIME);
        LASSERTF(MDS_ATTR_MTIME == 0x0000000000000020ULL, "found 0x%.16llxULL\n",
 -                      (long long)MDS_ATTR_MTIME);
 +               (long long)MDS_ATTR_MTIME);
        LASSERTF(MDS_ATTR_CTIME == 0x0000000000000040ULL, "found 0x%.16llxULL\n",
 -                      (long long)MDS_ATTR_CTIME);
 +               (long long)MDS_ATTR_CTIME);
        LASSERTF(MDS_ATTR_ATIME_SET == 0x0000000000000080ULL, "found 0x%.16llxULL\n",
 -                      (long long)MDS_ATTR_ATIME_SET);
 +               (long long)MDS_ATTR_ATIME_SET);
        LASSERTF(MDS_ATTR_MTIME_SET == 0x0000000000000100ULL, "found 0x%.16llxULL\n",
 -                      (long long)MDS_ATTR_MTIME_SET);
 +               (long long)MDS_ATTR_MTIME_SET);
        LASSERTF(MDS_ATTR_FORCE == 0x0000000000000200ULL, "found 0x%.16llxULL\n",
 -                      (long long)MDS_ATTR_FORCE);
 +               (long long)MDS_ATTR_FORCE);
        LASSERTF(MDS_ATTR_ATTR_FLAG == 0x0000000000000400ULL, "found 0x%.16llxULL\n",
 -                      (long long)MDS_ATTR_ATTR_FLAG);
 +               (long long)MDS_ATTR_ATTR_FLAG);
        LASSERTF(MDS_ATTR_KILL_SUID == 0x0000000000000800ULL, "found 0x%.16llxULL\n",
 -                      (long long)MDS_ATTR_KILL_SUID);
 +               (long long)MDS_ATTR_KILL_SUID);
        LASSERTF(MDS_ATTR_KILL_SGID == 0x0000000000001000ULL, "found 0x%.16llxULL\n",
 -                      (long long)MDS_ATTR_KILL_SGID);
 +               (long long)MDS_ATTR_KILL_SGID);
        LASSERTF(MDS_ATTR_CTIME_SET == 0x0000000000002000ULL, "found 0x%.16llxULL\n",
 -                      (long long)MDS_ATTR_CTIME_SET);
 +               (long long)MDS_ATTR_CTIME_SET);
        LASSERTF(MDS_ATTR_FROM_OPEN == 0x0000000000004000ULL, "found 0x%.16llxULL\n",
 -                      (long long)MDS_ATTR_FROM_OPEN);
 +               (long long)MDS_ATTR_FROM_OPEN);
        LASSERTF(MDS_ATTR_BLOCKS == 0x0000000000008000ULL, "found 0x%.16llxULL\n",
 -                      (long long)MDS_ATTR_BLOCKS);
 +               (long long)MDS_ATTR_BLOCKS);
        LASSERTF(FLD_QUERY == 900, "found %lld\n",
                 (long long)FLD_QUERY);
        LASSERTF(FLD_FIRST_OPC == 900, "found %lld\n",
        LASSERTF((int)sizeof(((struct lustre_mdt_attrs *)0)->lma_self_fid) == 16, "found %lld\n",
                 (long long)(int)sizeof(((struct lustre_mdt_attrs *)0)->lma_self_fid));
        LASSERTF(LMAI_RELEASED == 0x00000001UL, "found 0x%.8xUL\n",
 -              (unsigned)LMAI_RELEASED);
 +               (unsigned)LMAI_RELEASED);
        LASSERTF(LMAC_HSM == 0x00000001UL, "found 0x%.8xUL\n",
 -              (unsigned)LMAC_HSM);
 +               (unsigned)LMAC_HSM);
        LASSERTF(LMAC_SOM == 0x00000002UL, "found 0x%.8xUL\n",
 -              (unsigned)LMAC_SOM);
 +               (unsigned)LMAC_SOM);
        LASSERTF(LMAC_NOT_IN_OI == 0x00000004UL, "found 0x%.8xUL\n",
 -              (unsigned)LMAC_NOT_IN_OI);
 +               (unsigned)LMAC_NOT_IN_OI);
        LASSERTF(LMAC_FID_ON_OST == 0x00000008UL, "found 0x%.8xUL\n",
 -              (unsigned)LMAC_FID_ON_OST);
 +               (unsigned)LMAC_FID_ON_OST);
  
        /* Checks for struct ost_id */
        LASSERTF((int)sizeof(struct ost_id) == 16, "found %lld\n",
        LASSERTF(FID_SEQ_IGIF == 12, "found %lld\n",
                 (long long)FID_SEQ_IGIF);
        LASSERTF(FID_SEQ_IGIF_MAX == 0x00000000ffffffffULL, "found 0x%.16llxULL\n",
 -                      (long long)FID_SEQ_IGIF_MAX);
 +               (long long)FID_SEQ_IGIF_MAX);
        LASSERTF(FID_SEQ_IDIF == 0x0000000100000000ULL, "found 0x%.16llxULL\n",
 -                      (long long)FID_SEQ_IDIF);
 +               (long long)FID_SEQ_IDIF);
        LASSERTF(FID_SEQ_IDIF_MAX == 0x00000001ffffffffULL, "found 0x%.16llxULL\n",
 -                      (long long)FID_SEQ_IDIF_MAX);
 +               (long long)FID_SEQ_IDIF_MAX);
        LASSERTF(FID_SEQ_START == 0x0000000200000000ULL, "found 0x%.16llxULL\n",
 -                      (long long)FID_SEQ_START);
 +               (long long)FID_SEQ_START);
        LASSERTF(FID_SEQ_LOCAL_FILE == 0x0000000200000001ULL, "found 0x%.16llxULL\n",
 -                      (long long)FID_SEQ_LOCAL_FILE);
 +               (long long)FID_SEQ_LOCAL_FILE);
        LASSERTF(FID_SEQ_DOT_LUSTRE == 0x0000000200000002ULL, "found 0x%.16llxULL\n",
 -                      (long long)FID_SEQ_DOT_LUSTRE);
 +               (long long)FID_SEQ_DOT_LUSTRE);
        LASSERTF(FID_SEQ_SPECIAL == 0x0000000200000004ULL, "found 0x%.16llxULL\n",
 -                      (long long)FID_SEQ_SPECIAL);
 +               (long long)FID_SEQ_SPECIAL);
        LASSERTF(FID_SEQ_QUOTA == 0x0000000200000005ULL, "found 0x%.16llxULL\n",
 -                      (long long)FID_SEQ_QUOTA);
 +               (long long)FID_SEQ_QUOTA);
        LASSERTF(FID_SEQ_QUOTA_GLB == 0x0000000200000006ULL, "found 0x%.16llxULL\n",
 -                      (long long)FID_SEQ_QUOTA_GLB);
 +               (long long)FID_SEQ_QUOTA_GLB);
        LASSERTF(FID_SEQ_ROOT == 0x0000000200000007ULL, "found 0x%.16llxULL\n",
 -                      (long long)FID_SEQ_ROOT);
 +               (long long)FID_SEQ_ROOT);
        LASSERTF(FID_SEQ_NORMAL == 0x0000000200000400ULL, "found 0x%.16llxULL\n",
 -                      (long long)FID_SEQ_NORMAL);
 +               (long long)FID_SEQ_NORMAL);
        LASSERTF(FID_SEQ_LOV_DEFAULT == 0xffffffffffffffffULL, "found 0x%.16llxULL\n",
 -                      (long long)FID_SEQ_LOV_DEFAULT);
 +               (long long)FID_SEQ_LOV_DEFAULT);
        LASSERTF(FID_OID_SPECIAL_BFL == 0x00000001UL, "found 0x%.8xUL\n",
 -              (unsigned)FID_OID_SPECIAL_BFL);
 +               (unsigned)FID_OID_SPECIAL_BFL);
        LASSERTF(FID_OID_DOT_LUSTRE == 0x00000001UL, "found 0x%.8xUL\n",
 -              (unsigned)FID_OID_DOT_LUSTRE);
 +               (unsigned)FID_OID_DOT_LUSTRE);
        LASSERTF(FID_OID_DOT_LUSTRE_OBF == 0x00000002UL, "found 0x%.8xUL\n",
 -              (unsigned)FID_OID_DOT_LUSTRE_OBF);
 +               (unsigned)FID_OID_DOT_LUSTRE_OBF);
  
        /* Checks for struct lu_dirent */
        LASSERTF((int)sizeof(struct lu_dirent) == 32, "found %lld\n",
        LASSERTF((int)sizeof(((struct lu_dirent *)0)->lde_name[0]) == 1, "found %lld\n",
                 (long long)(int)sizeof(((struct lu_dirent *)0)->lde_name[0]));
        LASSERTF(LUDA_FID == 0x00000001UL, "found 0x%.8xUL\n",
 -              (unsigned)LUDA_FID);
 +               (unsigned)LUDA_FID);
        LASSERTF(LUDA_TYPE == 0x00000002UL, "found 0x%.8xUL\n",
 -              (unsigned)LUDA_TYPE);
 +               (unsigned)LUDA_TYPE);
        LASSERTF(LUDA_64BITHASH == 0x00000004UL, "found 0x%.8xUL\n",
 -              (unsigned)LUDA_64BITHASH);
 +               (unsigned)LUDA_64BITHASH);
  
        /* Checks for struct luda_type */
        LASSERTF((int)sizeof(struct luda_type) == 2, "found %lld\n",
        LASSERTF((int)sizeof(((struct lustre_msg_v2 *)0)->lm_buflens[0]) == 4, "found %lld\n",
                 (long long)(int)sizeof(((struct lustre_msg_v2 *)0)->lm_buflens[0]));
        LASSERTF(LUSTRE_MSG_MAGIC_V2 == 0x0BD00BD3, "found 0x%.8x\n",
 -              LUSTRE_MSG_MAGIC_V2);
 +               LUSTRE_MSG_MAGIC_V2);
        LASSERTF(LUSTRE_MSG_MAGIC_V2_SWABBED == 0xD30BD00B, "found 0x%.8x\n",
 -              LUSTRE_MSG_MAGIC_V2_SWABBED);
 +               LUSTRE_MSG_MAGIC_V2_SWABBED);
  
        /* Checks for struct ptlrpc_body */
        LASSERTF((int)sizeof(struct ptlrpc_body_v3) == 184, "found %lld\n",
                 (long long)(int)offsetof(struct ptlrpc_body_v3, pb_padding));
        LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding) == 32, "found %lld\n",
                 (long long)(int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_padding));
 -      CLASSERT(JOBSTATS_JOBID_SIZE == 32);
 +      CLASSERT(LUSTRE_JOBID_SIZE == 32);
        LASSERTF((int)offsetof(struct ptlrpc_body_v3, pb_jobid) == 152, "found %lld\n",
                 (long long)(int)offsetof(struct ptlrpc_body_v3, pb_jobid));
        LASSERTF((int)sizeof(((struct ptlrpc_body_v3 *)0)->pb_jobid) == 32, "found %lld\n",
        LASSERTF(MSG_PTLRPC_HEADER_OFF == 31, "found %lld\n",
                 (long long)MSG_PTLRPC_HEADER_OFF);
        LASSERTF(PTLRPC_MSG_VERSION == 0x00000003, "found 0x%.8x\n",
 -              PTLRPC_MSG_VERSION);
 +               PTLRPC_MSG_VERSION);
        LASSERTF(LUSTRE_VERSION_MASK == 0xffff0000, "found 0x%.8x\n",
 -              LUSTRE_VERSION_MASK);
 +               LUSTRE_VERSION_MASK);
        LASSERTF(LUSTRE_OBD_VERSION == 0x00010000, "found 0x%.8x\n",
 -              LUSTRE_OBD_VERSION);
 +               LUSTRE_OBD_VERSION);
        LASSERTF(LUSTRE_MDS_VERSION == 0x00020000, "found 0x%.8x\n",
 -              LUSTRE_MDS_VERSION);
 +               LUSTRE_MDS_VERSION);
        LASSERTF(LUSTRE_OST_VERSION == 0x00030000, "found 0x%.8x\n",
 -              LUSTRE_OST_VERSION);
 +               LUSTRE_OST_VERSION);
        LASSERTF(LUSTRE_DLM_VERSION == 0x00040000, "found 0x%.8x\n",
 -              LUSTRE_DLM_VERSION);
 +               LUSTRE_DLM_VERSION);
        LASSERTF(LUSTRE_LOG_VERSION == 0x00050000, "found 0x%.8x\n",
 -              LUSTRE_LOG_VERSION);
 +               LUSTRE_LOG_VERSION);
        LASSERTF(LUSTRE_MGS_VERSION == 0x00060000, "found 0x%.8x\n",
 -              LUSTRE_MGS_VERSION);
 +               LUSTRE_MGS_VERSION);
        LASSERTF(MSGHDR_AT_SUPPORT == 1, "found %lld\n",
                 (long long)MSGHDR_AT_SUPPORT);
        LASSERTF(MSGHDR_CKSUM_INCOMPAT18 == 2, "found %lld\n",
                 (long long)MSGHDR_CKSUM_INCOMPAT18);
        LASSERTF(MSG_OP_FLAG_MASK == 0xffff0000UL, "found 0x%.8xUL\n",
 -              (unsigned)MSG_OP_FLAG_MASK);
 +               (unsigned)MSG_OP_FLAG_MASK);
        LASSERTF(MSG_OP_FLAG_SHIFT == 16, "found %lld\n",
                 (long long)MSG_OP_FLAG_SHIFT);
        LASSERTF(MSG_GEN_FLAG_MASK == 0x0000ffffUL, "found 0x%.8xUL\n",
 -              (unsigned)MSG_GEN_FLAG_MASK);
 +               (unsigned)MSG_GEN_FLAG_MASK);
        LASSERTF(MSG_LAST_REPLAY == 0x00000001UL, "found 0x%.8xUL\n",
 -              (unsigned)MSG_LAST_REPLAY);
 +               (unsigned)MSG_LAST_REPLAY);
        LASSERTF(MSG_RESENT == 0x00000002UL, "found 0x%.8xUL\n",
 -              (unsigned)MSG_RESENT);
 +               (unsigned)MSG_RESENT);
        LASSERTF(MSG_REPLAY == 0x00000004UL, "found 0x%.8xUL\n",
 -              (unsigned)MSG_REPLAY);
 +               (unsigned)MSG_REPLAY);
        LASSERTF(MSG_DELAY_REPLAY == 0x00000010UL, "found 0x%.8xUL\n",
 -              (unsigned)MSG_DELAY_REPLAY);
 +               (unsigned)MSG_DELAY_REPLAY);
        LASSERTF(MSG_VERSION_REPLAY == 0x00000020UL, "found 0x%.8xUL\n",
 -              (unsigned)MSG_VERSION_REPLAY);
 +               (unsigned)MSG_VERSION_REPLAY);
        LASSERTF(MSG_REQ_REPLAY_DONE == 0x00000040UL, "found 0x%.8xUL\n",
 -              (unsigned)MSG_REQ_REPLAY_DONE);
 +               (unsigned)MSG_REQ_REPLAY_DONE);
        LASSERTF(MSG_LOCK_REPLAY_DONE == 0x00000080UL, "found 0x%.8xUL\n",
 -              (unsigned)MSG_LOCK_REPLAY_DONE);
 +               (unsigned)MSG_LOCK_REPLAY_DONE);
        LASSERTF(MSG_CONNECT_RECOVERING == 0x00000001UL, "found 0x%.8xUL\n",
 -              (unsigned)MSG_CONNECT_RECOVERING);
 +               (unsigned)MSG_CONNECT_RECOVERING);
        LASSERTF(MSG_CONNECT_RECONNECT == 0x00000002UL, "found 0x%.8xUL\n",
 -              (unsigned)MSG_CONNECT_RECONNECT);
 +               (unsigned)MSG_CONNECT_RECONNECT);
        LASSERTF(MSG_CONNECT_REPLAYABLE == 0x00000004UL, "found 0x%.8xUL\n",
 -              (unsigned)MSG_CONNECT_REPLAYABLE);
 +               (unsigned)MSG_CONNECT_REPLAYABLE);
        LASSERTF(MSG_CONNECT_LIBCLIENT == 0x00000010UL, "found 0x%.8xUL\n",
 -              (unsigned)MSG_CONNECT_LIBCLIENT);
 +               (unsigned)MSG_CONNECT_LIBCLIENT);
        LASSERTF(MSG_CONNECT_INITIAL == 0x00000020UL, "found 0x%.8xUL\n",
 -              (unsigned)MSG_CONNECT_INITIAL);
 +               (unsigned)MSG_CONNECT_INITIAL);
        LASSERTF(MSG_CONNECT_ASYNC == 0x00000040UL, "found 0x%.8xUL\n",
 -              (unsigned)MSG_CONNECT_ASYNC);
 +               (unsigned)MSG_CONNECT_ASYNC);
        LASSERTF(MSG_CONNECT_NEXT_VER == 0x00000080UL, "found 0x%.8xUL\n",
 -              (unsigned)MSG_CONNECT_NEXT_VER);
 +               (unsigned)MSG_CONNECT_NEXT_VER);
        LASSERTF(MSG_CONNECT_TRANSNO == 0x00000100UL, "found 0x%.8xUL\n",
 -              (unsigned)MSG_CONNECT_TRANSNO);
 +               (unsigned)MSG_CONNECT_TRANSNO);
  
        /* Checks for struct obd_connect_data */
        LASSERTF((int)sizeof(struct obd_connect_data) == 192, "found %lld\n",
                 "found 0x%.16llxULL\n", OBD_CONNECT_FLOCK_DEAD);
        LASSERTF(OBD_CONNECT_OPEN_BY_FID == 0x20000000000000ULL,
                 "found 0x%.16llxULL\n", OBD_CONNECT_OPEN_BY_FID);
 +      LASSERTF(OBD_CONNECT_LFSCK == 0x40000000000000ULL, "found 0x%.16llxULL\n",
 +               OBD_CONNECT_LFSCK);
 +      LASSERTF(OBD_CONNECT_UNLINK_CLOSE == 0x100000000000000ULL, "found 0x%.16llxULL\n",
 +               OBD_CONNECT_UNLINK_CLOSE);
 +      LASSERTF(OBD_CONNECT_DIR_STRIPE == 0x400000000000000ULL, "found 0x%.16llxULL\n",
 +               OBD_CONNECT_DIR_STRIPE);
        LASSERTF(OBD_CKSUM_CRC32 == 0x00000001UL, "found 0x%.8xUL\n",
 -              (unsigned)OBD_CKSUM_CRC32);
 +               (unsigned)OBD_CKSUM_CRC32);
        LASSERTF(OBD_CKSUM_ADLER == 0x00000002UL, "found 0x%.8xUL\n",
 -              (unsigned)OBD_CKSUM_ADLER);
 +               (unsigned)OBD_CKSUM_ADLER);
        LASSERTF(OBD_CKSUM_CRC32C == 0x00000004UL, "found 0x%.8xUL\n",
 -              (unsigned)OBD_CKSUM_CRC32C);
 +               (unsigned)OBD_CKSUM_CRC32C);
  
        /* Checks for struct obdo */
        LASSERTF((int)sizeof(struct obdo) == 208, "found %lld\n",
                 (long long)(int)offsetof(struct lov_mds_md_v1, lmm_objects[0]));
        LASSERTF((int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_objects[0]) == 24, "found %lld\n",
                 (long long)(int)sizeof(((struct lov_mds_md_v1 *)0)->lmm_objects[0]));
 -      CLASSERT(LOV_MAGIC_V1 == 0x0BD10BD0);
 +      CLASSERT(LOV_MAGIC_V1 == (0x0BD10000 | 0x0BD0));
  
        /* Checks for struct lov_mds_md_v3 */
        LASSERTF((int)sizeof(struct lov_mds_md_v3) == 48, "found %lld\n",
                 (long long)(int)offsetof(struct lov_mds_md_v3, lmm_layout_gen));
        LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_layout_gen) == 2, "found %lld\n",
                 (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_layout_gen));
 -      CLASSERT(LOV_MAXPOOLNAME == 16);
 +      CLASSERT(LOV_MAXPOOLNAME == 15);
        LASSERTF((int)offsetof(struct lov_mds_md_v3, lmm_pool_name[16]) == 48, "found %lld\n",
                 (long long)(int)offsetof(struct lov_mds_md_v3, lmm_pool_name[16]));
        LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_pool_name[16]) == 1, "found %lld\n",
                 (long long)(int)offsetof(struct lov_mds_md_v3, lmm_objects[0]));
        LASSERTF((int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_objects[0]) == 24, "found %lld\n",
                 (long long)(int)sizeof(((struct lov_mds_md_v3 *)0)->lmm_objects[0]));
 -      CLASSERT(LOV_MAGIC_V3 == 0x0BD30BD0);
 +      CLASSERT(LOV_MAGIC_V3 == (0x0BD30000 | 0x0BD0));
        LASSERTF(LOV_PATTERN_RAID0 == 0x00000001UL, "found 0x%.8xUL\n",
 -              (unsigned)LOV_PATTERN_RAID0);
 +               (unsigned)LOV_PATTERN_RAID0);
        LASSERTF(LOV_PATTERN_RAID1 == 0x00000002UL, "found 0x%.8xUL\n",
 -              (unsigned)LOV_PATTERN_RAID1);
 +               (unsigned)LOV_PATTERN_RAID1);
        LASSERTF(LOV_PATTERN_FIRST == 0x00000100UL, "found 0x%.8xUL\n",
 -              (unsigned)LOV_PATTERN_FIRST);
 +               (unsigned)LOV_PATTERN_FIRST);
        LASSERTF(LOV_PATTERN_CMOBD == 0x00000200UL, "found 0x%.8xUL\n",
 -              (unsigned)LOV_PATTERN_CMOBD);
 +               (unsigned)LOV_PATTERN_CMOBD);
 +
 +      /* Checks for struct lmv_mds_md_v1 */
 +      LASSERTF((int)sizeof(struct lmv_mds_md_v1) == 56, "found %lld\n",
 +               (long long)(int)sizeof(struct lmv_mds_md_v1));
 +      LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_magic) == 0, "found %lld\n",
 +               (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_magic));
 +      LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_magic) == 4, "found %lld\n",
 +               (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_magic));
 +      LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_stripe_count) == 4, "found %lld\n",
 +               (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_stripe_count));
 +      LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_stripe_count) == 4, "found %lld\n",
 +               (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_stripe_count));
 +      LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_master_mdt_index) == 8, "found %lld\n",
 +               (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_master_mdt_index));
 +      LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_master_mdt_index) == 4, "found %lld\n",
 +               (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_master_mdt_index));
 +      LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_hash_type) == 12, "found %lld\n",
 +               (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_hash_type));
 +      LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_hash_type) == 4, "found %lld\n",
 +               (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_hash_type));
 +      LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_layout_version) == 16, "found %lld\n",
 +               (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_layout_version));
 +      LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_layout_version) == 4, "found %lld\n",
 +               (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_layout_version));
 +      LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_padding1) == 20, "found %lld\n",
 +               (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_padding1));
 +      LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding1) == 4, "found %lld\n",
 +               (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding1));
 +      LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_padding2) == 24, "found %lld\n",
 +               (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_padding2));
 +      LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding2) == 8, "found %lld\n",
 +               (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding2));
 +      LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_padding3) == 32, "found %lld\n",
 +               (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_padding3));
 +      LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding3) == 8, "found %lld\n",
 +               (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_padding3));
 +      LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_pool_name[16]) == 56, "found %lld\n",
 +               (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_pool_name[16]));
 +      LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_pool_name[16]) == 1, "found %lld\n",
 +               (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_pool_name[16]));
 +      LASSERTF((int)offsetof(struct lmv_mds_md_v1, lmv_stripe_fids[0]) == 56, "found %lld\n",
 +               (long long)(int)offsetof(struct lmv_mds_md_v1, lmv_stripe_fids[0]));
 +      LASSERTF((int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_stripe_fids[0]) == 16, "found %lld\n",
 +               (long long)(int)sizeof(((struct lmv_mds_md_v1 *)0)->lmv_stripe_fids[0]));
 +      CLASSERT(LMV_MAGIC_V1 == 0x0CD20CD0);
 +      CLASSERT(LMV_MAGIC_STRIPE == 0x0CD40CD0);
 +      CLASSERT(LMV_HASH_TYPE_MASK == 0x0000ffff);
 +      CLASSERT(LMV_HASH_FLAG_MIGRATION == 0x80000000);
 +      CLASSERT(LMV_HASH_FLAG_DEAD == 0x40000000);
  
        /* Checks for struct obd_statfs */
        LASSERTF((int)sizeof(struct obd_statfs) == 144, "found %lld\n",
        LASSERTF((int)sizeof(((struct obd_dqblk *)0)->dqb_padding) == 4, "found %lld\n",
                 (long long)(int)sizeof(((struct obd_dqblk *)0)->dqb_padding));
        LASSERTF(Q_QUOTACHECK == 0x800100, "found 0x%.8x\n",
 -              Q_QUOTACHECK);
 +               Q_QUOTACHECK);
        LASSERTF(Q_INITQUOTA == 0x800101, "found 0x%.8x\n",
 -              Q_INITQUOTA);
 +               Q_INITQUOTA);
        LASSERTF(Q_GETOINFO == 0x800102, "found 0x%.8x\n",
 -              Q_GETOINFO);
 +               Q_GETOINFO);
        LASSERTF(Q_GETOQUOTA == 0x800103, "found 0x%.8x\n",
 -              Q_GETOQUOTA);
 +               Q_GETOQUOTA);
        LASSERTF(Q_FINVALIDATE == 0x800104, "found 0x%.8x\n",
 -              Q_FINVALIDATE);
 +               Q_FINVALIDATE);
  
        /* Checks for struct niobuf_remote */
        LASSERTF((int)sizeof(struct niobuf_remote) == 16, "found %lld\n",
                 (long long)(int)sizeof(struct niobuf_remote));
 -      LASSERTF((int)offsetof(struct niobuf_remote, offset) == 0, "found %lld\n",
 -               (long long)(int)offsetof(struct niobuf_remote, offset));
 -      LASSERTF((int)sizeof(((struct niobuf_remote *)0)->offset) == 8, "found %lld\n",
 -               (long long)(int)sizeof(((struct niobuf_remote *)0)->offset));
 -      LASSERTF((int)offsetof(struct niobuf_remote, len) == 8, "found %lld\n",
 -               (long long)(int)offsetof(struct niobuf_remote, len));
 -      LASSERTF((int)sizeof(((struct niobuf_remote *)0)->len) == 4, "found %lld\n",
 -               (long long)(int)sizeof(((struct niobuf_remote *)0)->len));
 -      LASSERTF((int)offsetof(struct niobuf_remote, flags) == 12, "found %lld\n",
 -               (long long)(int)offsetof(struct niobuf_remote, flags));
 -      LASSERTF((int)sizeof(((struct niobuf_remote *)0)->flags) == 4, "found %lld\n",
 -               (long long)(int)sizeof(((struct niobuf_remote *)0)->flags));
 +      LASSERTF((int)offsetof(struct niobuf_remote, rnb_offset) == 0, "found %lld\n",
 +               (long long)(int)offsetof(struct niobuf_remote, rnb_offset));
 +      LASSERTF((int)sizeof(((struct niobuf_remote *)0)->rnb_offset) == 8, "found %lld\n",
 +               (long long)(int)sizeof(((struct niobuf_remote *)0)->rnb_offset));
 +      LASSERTF((int)offsetof(struct niobuf_remote, rnb_len) == 8, "found %lld\n",
 +               (long long)(int)offsetof(struct niobuf_remote, rnb_len));
 +      LASSERTF((int)sizeof(((struct niobuf_remote *)0)->rnb_len) == 4, "found %lld\n",
 +               (long long)(int)sizeof(((struct niobuf_remote *)0)->rnb_len));
 +      LASSERTF((int)offsetof(struct niobuf_remote, rnb_flags) == 12, "found %lld\n",
 +               (long long)(int)offsetof(struct niobuf_remote, rnb_flags));
 +      LASSERTF((int)sizeof(((struct niobuf_remote *)0)->rnb_flags) == 4, "found %lld\n",
 +               (long long)(int)sizeof(((struct niobuf_remote *)0)->rnb_flags));
        LASSERTF(OBD_BRW_READ == 0x01, "found 0x%.8x\n",
 -              OBD_BRW_READ);
 +               OBD_BRW_READ);
        LASSERTF(OBD_BRW_WRITE == 0x02, "found 0x%.8x\n",
 -              OBD_BRW_WRITE);
 +               OBD_BRW_WRITE);
        LASSERTF(OBD_BRW_SYNC == 0x08, "found 0x%.8x\n",
 -              OBD_BRW_SYNC);
 +               OBD_BRW_SYNC);
        LASSERTF(OBD_BRW_CHECK == 0x10, "found 0x%.8x\n",
 -              OBD_BRW_CHECK);
 +               OBD_BRW_CHECK);
        LASSERTF(OBD_BRW_FROM_GRANT == 0x20, "found 0x%.8x\n",
 -              OBD_BRW_FROM_GRANT);
 +               OBD_BRW_FROM_GRANT);
        LASSERTF(OBD_BRW_GRANTED == 0x40, "found 0x%.8x\n",
 -              OBD_BRW_GRANTED);
 +               OBD_BRW_GRANTED);
        LASSERTF(OBD_BRW_NOCACHE == 0x80, "found 0x%.8x\n",
 -              OBD_BRW_NOCACHE);
 +               OBD_BRW_NOCACHE);
        LASSERTF(OBD_BRW_NOQUOTA == 0x100, "found 0x%.8x\n",
 -              OBD_BRW_NOQUOTA);
 +               OBD_BRW_NOQUOTA);
        LASSERTF(OBD_BRW_SRVLOCK == 0x200, "found 0x%.8x\n",
 -              OBD_BRW_SRVLOCK);
 +               OBD_BRW_SRVLOCK);
        LASSERTF(OBD_BRW_ASYNC == 0x400, "found 0x%.8x\n",
 -              OBD_BRW_ASYNC);
 +               OBD_BRW_ASYNC);
        LASSERTF(OBD_BRW_MEMALLOC == 0x800, "found 0x%.8x\n",
 -              OBD_BRW_MEMALLOC);
 +               OBD_BRW_MEMALLOC);
        LASSERTF(OBD_BRW_OVER_USRQUOTA == 0x1000, "found 0x%.8x\n",
                 OBD_BRW_OVER_USRQUOTA);
        LASSERTF(OBD_BRW_OVER_GRPQUOTA == 0x2000, "found 0x%.8x\n",
        /* Checks for struct mdt_body */
        LASSERTF((int)sizeof(struct mdt_body) == 216, "found %lld\n",
                 (long long)(int)sizeof(struct mdt_body));
 -      LASSERTF((int)offsetof(struct mdt_body, fid1) == 0, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, fid1));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->fid1) == 16, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->fid1));
 -      LASSERTF((int)offsetof(struct mdt_body, fid2) == 16, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, fid2));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->fid2) == 16, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->fid2));
 -      LASSERTF((int)offsetof(struct mdt_body, handle) == 32, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, handle));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->handle) == 8, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->handle));
 -      LASSERTF((int)offsetof(struct mdt_body, valid) == 40, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, valid));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->valid) == 8, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->valid));
 -      LASSERTF((int)offsetof(struct mdt_body, size) == 48, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, size));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->size) == 8, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->size));
 -      LASSERTF((int)offsetof(struct mdt_body, mtime) == 56, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, mtime));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->mtime) == 8, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->mtime));
 -      LASSERTF((int)offsetof(struct mdt_body, atime) == 64, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, atime));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->atime) == 8, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->atime));
 -      LASSERTF((int)offsetof(struct mdt_body, ctime) == 72, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, ctime));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->ctime) == 8, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->ctime));
 -      LASSERTF((int)offsetof(struct mdt_body, blocks) == 80, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, blocks));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->blocks) == 8, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->blocks));
 -      LASSERTF((int)offsetof(struct mdt_body, t_state) == 96, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, t_state));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->t_state) == 8,
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_fid1) == 0, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_fid1));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_fid1) == 16, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_fid1));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_fid2) == 16, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_fid2));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_fid2) == 16, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_fid2));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_handle) == 32, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_handle));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_handle) == 8, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_handle));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_valid) == 40, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_valid));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_valid) == 8, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_valid));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_size) == 48, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_size));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_size) == 8, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_size));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_mtime) == 56, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_mtime));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_mtime) == 8, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_mtime));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_atime) == 64, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_atime));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_atime) == 8, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_atime));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_ctime) == 72, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_ctime));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_ctime) == 8, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_ctime));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_blocks) == 80, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_blocks));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_blocks) == 8, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_blocks));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_t_state) == 96, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_t_state));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_t_state) == 8,
                 "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->t_state));
 -      LASSERTF((int)offsetof(struct mdt_body, fsuid) == 104, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, fsuid));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->fsuid) == 4, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->fsuid));
 -      LASSERTF((int)offsetof(struct mdt_body, fsgid) == 108, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, fsgid));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->fsgid) == 4, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->fsgid));
 -      LASSERTF((int)offsetof(struct mdt_body, capability) == 112, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, capability));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->capability) == 4, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->capability));
 -      LASSERTF((int)offsetof(struct mdt_body, mode) == 116, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, mode));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->mode) == 4, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->mode));
 -      LASSERTF((int)offsetof(struct mdt_body, uid) == 120, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, uid));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->uid) == 4, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->uid));
 -      LASSERTF((int)offsetof(struct mdt_body, gid) == 124, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, gid));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->gid) == 4, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->gid));
 -      LASSERTF((int)offsetof(struct mdt_body, flags) == 128, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, flags));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->flags) == 4, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->flags));
 -      LASSERTF((int)offsetof(struct mdt_body, rdev) == 132, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, rdev));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->rdev) == 4, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->rdev));
 -      LASSERTF((int)offsetof(struct mdt_body, nlink) == 136, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, nlink));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->nlink) == 4, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->nlink));
 -      LASSERTF((int)offsetof(struct mdt_body, unused2) == 140, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, unused2));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->unused2) == 4, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->unused2));
 -      LASSERTF((int)offsetof(struct mdt_body, suppgid) == 144, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, suppgid));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->suppgid) == 4, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->suppgid));
 -      LASSERTF((int)offsetof(struct mdt_body, eadatasize) == 148, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, eadatasize));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->eadatasize) == 4, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->eadatasize));
 -      LASSERTF((int)offsetof(struct mdt_body, aclsize) == 152, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, aclsize));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->aclsize) == 4, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->aclsize));
 -      LASSERTF((int)offsetof(struct mdt_body, max_mdsize) == 156, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, max_mdsize));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->max_mdsize) == 4, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->max_mdsize));
 -      LASSERTF((int)offsetof(struct mdt_body, max_cookiesize) == 160, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, max_cookiesize));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->max_cookiesize) == 4, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->max_cookiesize));
 -      LASSERTF((int)offsetof(struct mdt_body, uid_h) == 164, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, uid_h));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->uid_h) == 4, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->uid_h));
 -      LASSERTF((int)offsetof(struct mdt_body, gid_h) == 168, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, gid_h));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->gid_h) == 4, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->gid_h));
 -      LASSERTF((int)offsetof(struct mdt_body, padding_5) == 172, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, padding_5));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_5) == 4, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->padding_5));
 -      LASSERTF((int)offsetof(struct mdt_body, padding_6) == 176, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, padding_6));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_6) == 8, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->padding_6));
 -      LASSERTF((int)offsetof(struct mdt_body, padding_7) == 184, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, padding_7));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_7) == 8, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->padding_7));
 -      LASSERTF((int)offsetof(struct mdt_body, padding_8) == 192, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, padding_8));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_8) == 8, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->padding_8));
 -      LASSERTF((int)offsetof(struct mdt_body, padding_9) == 200, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, padding_9));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_9) == 8, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->padding_9));
 -      LASSERTF((int)offsetof(struct mdt_body, padding_10) == 208, "found %lld\n",
 -               (long long)(int)offsetof(struct mdt_body, padding_10));
 -      LASSERTF((int)sizeof(((struct mdt_body *)0)->padding_10) == 8, "found %lld\n",
 -               (long long)(int)sizeof(((struct mdt_body *)0)->padding_10));
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_t_state));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_fsuid) == 104, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_fsuid));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_fsuid) == 4, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_fsuid));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_fsgid) == 108, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_fsgid));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_fsgid) == 4, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_fsgid));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_capability) == 112, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_capability));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_capability) == 4, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_capability));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_mode) == 116, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_mode));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_mode) == 4, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_mode));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_uid) == 120, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_uid));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_uid) == 4, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_uid));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_gid) == 124, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_gid));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_gid) == 4, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_gid));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_flags) == 128, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_flags));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_flags) == 4, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_flags));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_rdev) == 132, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_rdev));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_rdev) == 4, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_rdev));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_nlink) == 136, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_nlink));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_nlink) == 4, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_nlink));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_unused2) == 140, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_unused2));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_unused2) == 4, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_unused2));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_suppgid) == 144, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_suppgid));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_suppgid) == 4, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_suppgid));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_eadatasize) == 148, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_eadatasize));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_eadatasize) == 4, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_eadatasize));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_aclsize) == 152, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_aclsize));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_aclsize) == 4, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_aclsize));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_max_mdsize) == 156, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_max_mdsize));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_max_mdsize) == 4, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_max_mdsize));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_max_cookiesize) == 160, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_max_cookiesize));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_max_cookiesize) == 4, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_max_cookiesize));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_uid_h) == 164, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_uid_h));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_uid_h) == 4, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_uid_h));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_gid_h) == 168, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_gid_h));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_gid_h) == 4, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_gid_h));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_padding_5) == 172, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_padding_5));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_5) == 4, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_5));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_padding_6) == 176, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_padding_6));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_6) == 8, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_6));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_padding_7) == 184, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_padding_7));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_7) == 8, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_7));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_padding_8) == 192, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_padding_8));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_8) == 8, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_8));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_padding_9) == 200, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_padding_9));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_9) == 8, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_9));
 +      LASSERTF((int)offsetof(struct mdt_body, mbo_padding_10) == 208, "found %lld\n",
 +               (long long)(int)offsetof(struct mdt_body, mbo_padding_10));
 +      LASSERTF((int)sizeof(((struct mdt_body *)0)->mbo_padding_10) == 8, "found %lld\n",
 +               (long long)(int)sizeof(((struct mdt_body *)0)->mbo_padding_10));
        LASSERTF(MDS_FMODE_CLOSED == 000000000000UL, "found 0%.11oUL\n",
 -              MDS_FMODE_CLOSED);
 +               MDS_FMODE_CLOSED);
        LASSERTF(MDS_FMODE_EXEC == 000000000004UL, "found 0%.11oUL\n",
 -              MDS_FMODE_EXEC);
 +               MDS_FMODE_EXEC);
        LASSERTF(MDS_FMODE_EPOCH == 000001000000UL, "found 0%.11oUL\n",
 -              MDS_FMODE_EPOCH);
 +               MDS_FMODE_EPOCH);
        LASSERTF(MDS_FMODE_TRUNC == 000002000000UL, "found 0%.11oUL\n",
 -              MDS_FMODE_TRUNC);
 +               MDS_FMODE_TRUNC);
        LASSERTF(MDS_FMODE_SOM == 000004000000UL, "found 0%.11oUL\n",
 -              MDS_FMODE_SOM);
 +               MDS_FMODE_SOM);
        LASSERTF(MDS_OPEN_CREATED == 000000000010UL, "found 0%.11oUL\n",
 -              MDS_OPEN_CREATED);
 +               MDS_OPEN_CREATED);
        LASSERTF(MDS_OPEN_CROSS == 000000000020UL, "found 0%.11oUL\n",
 -              MDS_OPEN_CROSS);
 +               MDS_OPEN_CROSS);
        LASSERTF(MDS_OPEN_CREAT == 000000000100UL, "found 0%.11oUL\n",
 -              MDS_OPEN_CREAT);
 +               MDS_OPEN_CREAT);
        LASSERTF(MDS_OPEN_EXCL == 000000000200UL, "found 0%.11oUL\n",
 -              MDS_OPEN_EXCL);
 +               MDS_OPEN_EXCL);
        LASSERTF(MDS_OPEN_TRUNC == 000000001000UL, "found 0%.11oUL\n",
 -              MDS_OPEN_TRUNC);
 +               MDS_OPEN_TRUNC);
        LASSERTF(MDS_OPEN_APPEND == 000000002000UL, "found 0%.11oUL\n",
 -              MDS_OPEN_APPEND);
 +               MDS_OPEN_APPEND);
        LASSERTF(MDS_OPEN_SYNC == 000000010000UL, "found 0%.11oUL\n",
 -              MDS_OPEN_SYNC);
 +               MDS_OPEN_SYNC);
        LASSERTF(MDS_OPEN_DIRECTORY == 000000200000UL, "found 0%.11oUL\n",
 -              MDS_OPEN_DIRECTORY);
 +               MDS_OPEN_DIRECTORY);
        LASSERTF(MDS_OPEN_BY_FID == 000040000000UL, "found 0%.11oUL\n",
 -              MDS_OPEN_BY_FID);
 +               MDS_OPEN_BY_FID);
        LASSERTF(MDS_OPEN_DELAY_CREATE == 000100000000UL, "found 0%.11oUL\n",
 -              MDS_OPEN_DELAY_CREATE);
 +               MDS_OPEN_DELAY_CREATE);
        LASSERTF(MDS_OPEN_OWNEROVERRIDE == 000200000000UL, "found 0%.11oUL\n",
 -              MDS_OPEN_OWNEROVERRIDE);
 +               MDS_OPEN_OWNEROVERRIDE);
        LASSERTF(MDS_OPEN_JOIN_FILE == 000400000000UL, "found 0%.11oUL\n",
 -              MDS_OPEN_JOIN_FILE);
 +               MDS_OPEN_JOIN_FILE);
        LASSERTF(MDS_OPEN_LOCK == 004000000000UL, "found 0%.11oUL\n",
 -              MDS_OPEN_LOCK);
 +               MDS_OPEN_LOCK);
        LASSERTF(MDS_OPEN_HAS_EA == 010000000000UL, "found 0%.11oUL\n",
 -              MDS_OPEN_HAS_EA);
 +               MDS_OPEN_HAS_EA);
        LASSERTF(MDS_OPEN_HAS_OBJS == 020000000000UL, "found 0%.11oUL\n",
 -              MDS_OPEN_HAS_OBJS);
 +               MDS_OPEN_HAS_OBJS);
        LASSERTF(MDS_OPEN_NORESTORE == 00000000000100000000000ULL, "found 0%.22lloULL\n",
 -                      (long long)MDS_OPEN_NORESTORE);
 +               (long long)MDS_OPEN_NORESTORE);
        LASSERTF(MDS_OPEN_NEWSTRIPE == 00000000000200000000000ULL, "found 0%.22lloULL\n",
 -                      (long long)MDS_OPEN_NEWSTRIPE);
 +               (long long)MDS_OPEN_NEWSTRIPE);
        LASSERTF(MDS_OPEN_VOLATILE == 00000000000400000000000ULL, "found 0%.22lloULL\n",
 -                      (long long)MDS_OPEN_VOLATILE);
 +               (long long)MDS_OPEN_VOLATILE);
        LASSERTF(LUSTRE_SYNC_FL == 0x00000008, "found 0x%.8x\n",
 -              LUSTRE_SYNC_FL);
 +               LUSTRE_SYNC_FL);
        LASSERTF(LUSTRE_IMMUTABLE_FL == 0x00000010, "found 0x%.8x\n",
 -              LUSTRE_IMMUTABLE_FL);
 +               LUSTRE_IMMUTABLE_FL);
        LASSERTF(LUSTRE_APPEND_FL == 0x00000020, "found 0x%.8x\n",
 -              LUSTRE_APPEND_FL);
 +               LUSTRE_APPEND_FL);
        LASSERTF(LUSTRE_NOATIME_FL == 0x00000080, "found 0x%.8x\n",
 -              LUSTRE_NOATIME_FL);
 +               LUSTRE_NOATIME_FL);
        LASSERTF(LUSTRE_DIRSYNC_FL == 0x00010000, "found 0x%.8x\n",
 -              LUSTRE_DIRSYNC_FL);
 +               LUSTRE_DIRSYNC_FL);
        LASSERTF(MDS_INODELOCK_LOOKUP == 0x000001, "found 0x%.8x\n",
 -              MDS_INODELOCK_LOOKUP);
 +               MDS_INODELOCK_LOOKUP);
        LASSERTF(MDS_INODELOCK_UPDATE == 0x000002, "found 0x%.8x\n",
 -              MDS_INODELOCK_UPDATE);
 +               MDS_INODELOCK_UPDATE);
        LASSERTF(MDS_INODELOCK_OPEN == 0x000004, "found 0x%.8x\n",
 -              MDS_INODELOCK_OPEN);
 +               MDS_INODELOCK_OPEN);
        LASSERTF(MDS_INODELOCK_LAYOUT == 0x000008, "found 0x%.8x\n",
 -              MDS_INODELOCK_LAYOUT);
 +               MDS_INODELOCK_LAYOUT);
  
        /* Checks for struct mdt_ioepoch */
        LASSERTF((int)sizeof(struct mdt_ioepoch) == 24, "found %lld\n",
        LASSERTF((int)sizeof(((struct lmv_desc *)0)->ld_uuid) == 40, "found %lld\n",
                 (long long)(int)sizeof(((struct lmv_desc *)0)->ld_uuid));
  
 -      /* Checks for struct lmv_stripe_md */
 -      LASSERTF((int)sizeof(struct lmv_stripe_md) == 32, "found %lld\n",
 -               (long long)(int)sizeof(struct lmv_stripe_md));
 -      LASSERTF((int)offsetof(struct lmv_stripe_md, mea_magic) == 0, "found %lld\n",
 -               (long long)(int)offsetof(struct lmv_stripe_md, mea_magic));
 -      LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_magic) == 4, "found %lld\n",
 -               (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_magic));
 -      LASSERTF((int)offsetof(struct lmv_stripe_md, mea_count) == 4, "found %lld\n",
 -               (long long)(int)offsetof(struct lmv_stripe_md, mea_count));
 -      LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_count) == 4, "found %lld\n",
 -               (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_count));
 -      LASSERTF((int)offsetof(struct lmv_stripe_md, mea_master) == 8, "found %lld\n",
 -               (long long)(int)offsetof(struct lmv_stripe_md, mea_master));
 -      LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_master) == 4, "found %lld\n",
 -               (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_master));
 -      LASSERTF((int)offsetof(struct lmv_stripe_md, mea_padding) == 12, "found %lld\n",
 -               (long long)(int)offsetof(struct lmv_stripe_md, mea_padding));
 -      LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_padding) == 4, "found %lld\n",
 -               (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_padding));
 -      CLASSERT(LOV_MAXPOOLNAME == 16);
 -      LASSERTF((int)offsetof(struct lmv_stripe_md, mea_pool_name[16]) == 32, "found %lld\n",
 -               (long long)(int)offsetof(struct lmv_stripe_md, mea_pool_name[16]));
 -      LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_pool_name[16]) == 1, "found %lld\n",
 -               (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_pool_name[16]));
 -      LASSERTF((int)offsetof(struct lmv_stripe_md, mea_ids[0]) == 32, "found %lld\n",
 -               (long long)(int)offsetof(struct lmv_stripe_md, mea_ids[0]));
 -      LASSERTF((int)sizeof(((struct lmv_stripe_md *)0)->mea_ids[0]) == 16, "found %lld\n",
 -               (long long)(int)sizeof(((struct lmv_stripe_md *)0)->mea_ids[0]));
 -
        /* Checks for struct lov_desc */
        LASSERTF((int)sizeof(struct lov_desc) == 88, "found %lld\n",
                 (long long)(int)sizeof(struct lov_desc));
                 (long long)(int)offsetof(struct llog_setattr64_rec, lsr_gid_h));
        LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid_h) == 4, "found %lld\n",
                 (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid_h));
 -      LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_padding) == 48, "found %lld\n",
 -               (long long)(int)offsetof(struct llog_setattr64_rec, lsr_padding));
 -      LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_padding) == 8, "found %lld\n",
 -               (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_padding));
 +      LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_valid) == 48, "found %lld\n",
 +               (long long)(int)offsetof(struct llog_setattr64_rec, lsr_valid));
 +      LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_valid) == 8, "found %lld\n",
 +               (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_valid));
        LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_tail) == 56, "found %lld\n",
                 (long long)(int)offsetof(struct llog_setattr64_rec, lsr_tail));
        LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_tail) == 8, "found %lld\n",
        LASSERTF((int)sizeof(((struct changelog_rec *)0)->cr_pfid) == 16, "found %lld\n",
                 (long long)(int)sizeof(((struct changelog_rec *)0)->cr_pfid));
  
 -      /* Checks for struct changelog_ext_rec */
 -      LASSERTF((int)sizeof(struct changelog_ext_rec) == 96, "found %lld\n",
 -               (long long)(int)sizeof(struct changelog_ext_rec));
 -      LASSERTF((int)offsetof(struct changelog_ext_rec, cr_namelen) == 0, "found %lld\n",
 -               (long long)(int)offsetof(struct changelog_ext_rec, cr_namelen));
 -      LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_namelen) == 2, "found %lld\n",
 -               (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_namelen));
 -      LASSERTF((int)offsetof(struct changelog_ext_rec, cr_flags) == 2, "found %lld\n",
 -               (long long)(int)offsetof(struct changelog_ext_rec, cr_flags));
 -      LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_flags) == 2, "found %lld\n",
 -               (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_flags));
 -      LASSERTF((int)offsetof(struct changelog_ext_rec, cr_type) == 4, "found %lld\n",
 -               (long long)(int)offsetof(struct changelog_ext_rec, cr_type));
 -      LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_type) == 4, "found %lld\n",
 -               (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_type));
 -      LASSERTF((int)offsetof(struct changelog_ext_rec, cr_index) == 8, "found %lld\n",
 -               (long long)(int)offsetof(struct changelog_ext_rec, cr_index));
 -      LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_index) == 8, "found %lld\n",
 -               (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_index));
 -      LASSERTF((int)offsetof(struct changelog_ext_rec, cr_prev) == 16, "found %lld\n",
 -               (long long)(int)offsetof(struct changelog_ext_rec, cr_prev));
 -      LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_prev) == 8, "found %lld\n",
 -               (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_prev));
 -      LASSERTF((int)offsetof(struct changelog_ext_rec, cr_time) == 24, "found %lld\n",
 -               (long long)(int)offsetof(struct changelog_ext_rec, cr_time));
 -      LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_time) == 8, "found %lld\n",
 -               (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_time));
 -      LASSERTF((int)offsetof(struct changelog_ext_rec, cr_tfid) == 32, "found %lld\n",
 -               (long long)(int)offsetof(struct changelog_ext_rec, cr_tfid));
 -      LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_tfid) == 16, "found %lld\n",
 -               (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_tfid));
 -      LASSERTF((int)offsetof(struct changelog_ext_rec, cr_pfid) == 48, "found %lld\n",
 -               (long long)(int)offsetof(struct changelog_ext_rec, cr_pfid));
 -      LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_pfid) == 16, "found %lld\n",
 -               (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_pfid));
 -      LASSERTF((int)offsetof(struct changelog_ext_rec, cr_sfid) == 64, "found %lld\n",
 -               (long long)(int)offsetof(struct changelog_ext_rec, cr_sfid));
 -      LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_sfid) == 16, "found %lld\n",
 -               (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_sfid));
 -      LASSERTF((int)offsetof(struct changelog_ext_rec, cr_spfid) == 80, "found %lld\n",
 -               (long long)(int)offsetof(struct changelog_ext_rec, cr_spfid));
 -      LASSERTF((int)sizeof(((struct changelog_ext_rec *)0)->cr_spfid) == 16, "found %lld\n",
 -               (long long)(int)sizeof(((struct changelog_ext_rec *)0)->cr_spfid));
 -
        /* Checks for struct changelog_setinfo */
        LASSERTF((int)sizeof(struct changelog_setinfo) == 12, "found %lld\n",
                 (long long)(int)sizeof(struct changelog_setinfo));
                 (long long)(int)offsetof(struct llog_changelog_rec, cr));
        LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr) == 64, "found %lld\n",
                 (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr));
 -      LASSERTF((int)offsetof(struct llog_changelog_rec, cr_tail) == 80, "found %lld\n",
 -               (long long)(int)offsetof(struct llog_changelog_rec, cr_tail));
 -      LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_tail) == 8, "found %lld\n",
 -               (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_tail));
 +      LASSERTF((int)offsetof(struct llog_changelog_rec, cr_do_not_use) == 80, "found %lld\n",
 +               (long long)(int)offsetof(struct llog_changelog_rec, cr_do_not_use));
 +      LASSERTF((int)sizeof(((struct llog_changelog_rec *)0)->cr_do_not_use) == 8, "found %lld\n",
 +               (long long)(int)sizeof(((struct llog_changelog_rec *)0)->cr_do_not_use));
  
        /* Checks for struct llog_changelog_user_rec */
        LASSERTF((int)sizeof(struct llog_changelog_user_rec) == 40, "found %lld\n",
        CLASSERT(LLOG_ORIGIN_HANDLE_DESTROY == 509);
        CLASSERT(LLOG_FIRST_OPC == 501);
        CLASSERT(LLOG_LAST_OPC == 510);
 +      CLASSERT(LLOG_CONFIG_ORIG_CTXT == 0);
 +      CLASSERT(LLOG_CONFIG_REPL_CTXT == 1);
 +      CLASSERT(LLOG_MDS_OST_ORIG_CTXT == 2);
 +      CLASSERT(LLOG_MDS_OST_REPL_CTXT == 3);
 +      CLASSERT(LLOG_SIZE_ORIG_CTXT == 4);
 +      CLASSERT(LLOG_SIZE_REPL_CTXT == 5);
 +      CLASSERT(LLOG_TEST_ORIG_CTXT == 8);
 +      CLASSERT(LLOG_TEST_REPL_CTXT == 9);
 +      CLASSERT(LLOG_CHANGELOG_ORIG_CTXT == 12);
 +      CLASSERT(LLOG_CHANGELOG_REPL_CTXT == 13);
 +      CLASSERT(LLOG_CHANGELOG_USER_ORIG_CTXT == 14);
 +      CLASSERT(LLOG_AGENT_ORIG_CTXT == 15);
 +      CLASSERT(LLOG_MAX_CTXTS == 16);
  
        /* Checks for struct llogd_conn_body */
        LASSERTF((int)sizeof(struct llogd_conn_body) == 40, "found %lld\n",
        CLASSERT(FIEMAP_EXTENT_NET == 0x80000000);
  
        /* Checks for type posix_acl_xattr_entry */
-       LASSERTF((int)sizeof(posix_acl_xattr_entry) == 8, "found %lld\n",
-                (long long)(int)sizeof(posix_acl_xattr_entry));
-       LASSERTF((int)offsetof(posix_acl_xattr_entry, e_tag) == 0, "found %lld\n",
-                (long long)(int)offsetof(posix_acl_xattr_entry, e_tag));
-       LASSERTF((int)sizeof(((posix_acl_xattr_entry *)0)->e_tag) == 2, "found %lld\n",
-                (long long)(int)sizeof(((posix_acl_xattr_entry *)0)->e_tag));
-       LASSERTF((int)offsetof(posix_acl_xattr_entry, e_perm) == 2, "found %lld\n",
-                (long long)(int)offsetof(posix_acl_xattr_entry, e_perm));
-       LASSERTF((int)sizeof(((posix_acl_xattr_entry *)0)->e_perm) == 2, "found %lld\n",
-                (long long)(int)sizeof(((posix_acl_xattr_entry *)0)->e_perm));
-       LASSERTF((int)offsetof(posix_acl_xattr_entry, e_id) == 4, "found %lld\n",
-                (long long)(int)offsetof(posix_acl_xattr_entry, e_id));
-       LASSERTF((int)sizeof(((posix_acl_xattr_entry *)0)->e_id) == 4, "found %lld\n",
-                (long long)(int)sizeof(((posix_acl_xattr_entry *)0)->e_id));
+       LASSERTF((int)sizeof(struct posix_acl_xattr_entry) == 8, "found %lld\n",
+                (long long)(int)sizeof(struct posix_acl_xattr_entry));
+       LASSERTF((int)offsetof(struct posix_acl_xattr_entry, e_tag) == 0, "found %lld\n",
+                (long long)(int)offsetof(struct posix_acl_xattr_entry, e_tag));
+       LASSERTF((int)sizeof(((struct posix_acl_xattr_entry *)0)->e_tag) == 2, "found %lld\n",
+                (long long)(int)sizeof(((struct posix_acl_xattr_entry *)0)->e_tag));
+       LASSERTF((int)offsetof(struct posix_acl_xattr_entry, e_perm) == 2, "found %lld\n",
+                (long long)(int)offsetof(struct posix_acl_xattr_entry, e_perm));
+       LASSERTF((int)sizeof(((struct posix_acl_xattr_entry *)0)->e_perm) == 2, "found %lld\n",
+                (long long)(int)sizeof(((struct posix_acl_xattr_entry *)0)->e_perm));
+       LASSERTF((int)offsetof(struct posix_acl_xattr_entry, e_id) == 4, "found %lld\n",
+                (long long)(int)offsetof(struct posix_acl_xattr_entry, e_id));
+       LASSERTF((int)sizeof(((struct posix_acl_xattr_entry *)0)->e_id) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct posix_acl_xattr_entry *)0)->e_id));
  
        /* Checks for type posix_acl_xattr_header */
-       LASSERTF((int)sizeof(posix_acl_xattr_header) == 4, "found %lld\n",
-                (long long)(int)sizeof(posix_acl_xattr_header));
-       LASSERTF((int)offsetof(posix_acl_xattr_header, a_version) == 0, "found %lld\n",
-                (long long)(int)offsetof(posix_acl_xattr_header, a_version));
-       LASSERTF((int)sizeof(((posix_acl_xattr_header *)0)->a_version) == 4, "found %lld\n",
-                (long long)(int)sizeof(((posix_acl_xattr_header *)0)->a_version));
-       LASSERTF((int)offsetof(posix_acl_xattr_header, a_entries) == 4, "found %lld\n",
-                (long long)(int)offsetof(posix_acl_xattr_header, a_entries));
-       LASSERTF((int)sizeof(((posix_acl_xattr_header *)0)->a_entries) == 0, "found %lld\n",
-                (long long)(int)sizeof(((posix_acl_xattr_header *)0)->a_entries));
+       LASSERTF((int)sizeof(struct posix_acl_xattr_header) == 4, "found %lld\n",
+                (long long)(int)sizeof(struct posix_acl_xattr_header));
+       LASSERTF((int)offsetof(struct posix_acl_xattr_header, a_version) == 0, "found %lld\n",
+                (long long)(int)offsetof(struct posix_acl_xattr_header, a_version));
+       LASSERTF((int)sizeof(((struct posix_acl_xattr_header *)0)->a_version) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct posix_acl_xattr_header *)0)->a_version));
  
        /* Checks for struct link_ea_header */
        LASSERTF((int)sizeof(struct link_ea_header) == 24, "found %lld\n",
        LASSERTF((int)sizeof(((struct hsm_progress *)0)->padding) == 4, "found %lld\n",
                 (long long)(int)sizeof(((struct hsm_progress *)0)->padding));
        LASSERTF(HP_FLAG_COMPLETED == 0x01, "found 0x%.8x\n",
 -              HP_FLAG_COMPLETED);
 +               HP_FLAG_COMPLETED);
        LASSERTF(HP_FLAG_RETRY == 0x02, "found 0x%.8x\n",
 -              HP_FLAG_RETRY);
 +               HP_FLAG_RETRY);
  
        LASSERTF((int)offsetof(struct hsm_copy, hc_data_version) == 0, "found %lld\n",
                 (long long)(int)offsetof(struct hsm_copy, hc_data_version));
        LASSERTF((int)sizeof(((struct hsm_request *)0)->hr_data_len) == 4, "found %lld\n",
                 (long long)(int)sizeof(((struct hsm_request *)0)->hr_data_len));
        LASSERTF(HSM_FORCE_ACTION == 0x00000001UL, "found 0x%.8xUL\n",
 -              (unsigned)HSM_FORCE_ACTION);
 +               (unsigned)HSM_FORCE_ACTION);
        LASSERTF(HSM_GHOST_COPY == 0x00000002UL, "found 0x%.8xUL\n",
 -              (unsigned)HSM_GHOST_COPY);
 +               (unsigned)HSM_GHOST_COPY);
  
        /* Checks for struct hsm_user_request */
        LASSERTF((int)sizeof(struct hsm_user_request) == 24, "found %lld\n",
diff --combined fs/btrfs/inode.c
@@@ -5072,7 -5072,7 +5072,7 @@@ static int btrfs_setattr(struct dentry 
        if (btrfs_root_readonly(root))
                return -EROFS;
  
-       err = inode_change_ok(inode, attr);
+       err = setattr_prepare(dentry, attr);
        if (err)
                return err;
  
@@@ -8412,7 -8412,7 +8412,7 @@@ static int btrfs_submit_direct_hook(str
        if (!bio)
                return -ENOMEM;
  
 -      bio_set_op_attrs(bio, bio_op(orig_bio), orig_bio->bi_opf);
 +      bio_set_op_attrs(bio, bio_op(orig_bio), bio_flags(orig_bio));
        bio->bi_private = dip;
        bio->bi_end_io = btrfs_end_dio_bio;
        btrfs_io_bio(bio)->logical = file_offset;
@@@ -8450,8 -8450,7 +8450,8 @@@ next_block
                                                  start_sector, GFP_NOFS);
                        if (!bio)
                                goto out_err;
 -                      bio_set_op_attrs(bio, bio_op(orig_bio), orig_bio->bi_opf);
 +                      bio_set_op_attrs(bio, bio_op(orig_bio),
 +                                       bio_flags(orig_bio));
                        bio->bi_private = dip;
                        bio->bi_end_io = btrfs_end_dio_bio;
                        btrfs_io_bio(bio)->logical = file_offset;
@@@ -10544,21 -10543,6 +10544,6 @@@ out_inode
  
  }
  
- /* Inspired by filemap_check_errors() */
- int btrfs_inode_check_errors(struct inode *inode)
- {
-       int ret = 0;
-       if (test_bit(AS_ENOSPC, &inode->i_mapping->flags) &&
-           test_and_clear_bit(AS_ENOSPC, &inode->i_mapping->flags))
-               ret = -ENOSPC;
-       if (test_bit(AS_EIO, &inode->i_mapping->flags) &&
-           test_and_clear_bit(AS_EIO, &inode->i_mapping->flags))
-               ret = -EIO;
-       return ret;
- }
  static const struct inode_operations btrfs_dir_inode_operations = {
        .getattr        = btrfs_getattr,
        .lookup         = btrfs_lookup,
diff --combined fs/ext2/inode.c
@@@ -32,7 -32,6 +32,7 @@@
  #include <linux/buffer_head.h>
  #include <linux/mpage.h>
  #include <linux/fiemap.h>
 +#include <linux/iomap.h>
  #include <linux/namei.h>
  #include <linux/uio.h>
  #include "ext2.h"
@@@ -619,7 -618,7 +619,7 @@@ static void ext2_splice_branch(struct i
   */
  static int ext2_get_blocks(struct inode *inode,
                           sector_t iblock, unsigned long maxblocks,
 -                         struct buffer_head *bh_result,
 +                         u32 *bno, bool *new, bool *boundary,
                           int create)
  {
        int err = -EIO;
        /* Simplest case - block found, no allocation needed */
        if (!partial) {
                first_block = le32_to_cpu(chain[depth - 1].key);
 -              clear_buffer_new(bh_result); /* What's this do? */
                count++;
                /*map more blocks*/
                while (count < maxblocks && count <= blocks_to_boundary) {
                        mutex_unlock(&ei->truncate_mutex);
                        if (err)
                                goto cleanup;
 -                      clear_buffer_new(bh_result);
                        goto got_it;
                }
        }
        }
  
        if (IS_DAX(inode)) {
 +              int i;
 +
 +              /*
 +               * We must unmap blocks before zeroing so that writeback cannot
 +               * overwrite zeros with stale data from block device page cache.
 +               */
 +              for (i = 0; i < count; i++) {
 +                      unmap_underlying_metadata(inode->i_sb->s_bdev,
 +                                      le32_to_cpu(chain[depth-1].key) + i);
 +              }
                /*
                 * block must be initialised before we put it in the tree
                 * so that it's not found by another thread before it's
                        mutex_unlock(&ei->truncate_mutex);
                        goto cleanup;
                }
 -      } else
 -              set_buffer_new(bh_result);
 +      } else {
 +              *new = true;
 +      }
  
        ext2_splice_branch(inode, iblock, partial, indirect_blks, count);
        mutex_unlock(&ei->truncate_mutex);
  got_it:
 -      map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
 +      *bno = le32_to_cpu(chain[depth-1].key);
        if (count > blocks_to_boundary)
 -              set_buffer_boundary(bh_result);
 +              *boundary = true;
        err = count;
        /* Clean up and exit */
        partial = chain + depth - 1;    /* the whole chain */
@@@ -775,82 -765,19 +775,82 @@@ cleanup
        return err;
  }
  
 -int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create)
 +int ext2_get_block(struct inode *inode, sector_t iblock,
 +              struct buffer_head *bh_result, int create)
  {
        unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
 -      int ret = ext2_get_blocks(inode, iblock, max_blocks,
 -                            bh_result, create);
 -      if (ret > 0) {
 -              bh_result->b_size = (ret << inode->i_blkbits);
 -              ret = 0;
 +      bool new = false, boundary = false;
 +      u32 bno;
 +      int ret;
 +
 +      ret = ext2_get_blocks(inode, iblock, max_blocks, &bno, &new, &boundary,
 +                      create);
 +      if (ret <= 0)
 +              return ret;
 +
 +      map_bh(bh_result, inode->i_sb, bno);
 +      bh_result->b_size = (ret << inode->i_blkbits);
 +      if (new)
 +              set_buffer_new(bh_result);
 +      if (boundary)
 +              set_buffer_boundary(bh_result);
 +      return 0;
 +
 +}
 +
 +#ifdef CONFIG_FS_DAX
 +static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 +              unsigned flags, struct iomap *iomap)
 +{
 +      unsigned int blkbits = inode->i_blkbits;
 +      unsigned long first_block = offset >> blkbits;
 +      unsigned long max_blocks = (length + (1 << blkbits) - 1) >> blkbits;
 +      bool new = false, boundary = false;
 +      u32 bno;
 +      int ret;
 +
 +      ret = ext2_get_blocks(inode, first_block, max_blocks,
 +                      &bno, &new, &boundary, flags & IOMAP_WRITE);
 +      if (ret < 0)
 +              return ret;
 +
 +      iomap->flags = 0;
 +      iomap->bdev = inode->i_sb->s_bdev;
 +      iomap->offset = (u64)first_block << blkbits;
 +
 +      if (ret == 0) {
 +              iomap->type = IOMAP_HOLE;
 +              iomap->blkno = IOMAP_NULL_BLOCK;
 +              iomap->length = 1 << blkbits;
 +      } else {
 +              iomap->type = IOMAP_MAPPED;
 +              iomap->blkno = (sector_t)bno << (blkbits - 9);
 +              iomap->length = (u64)ret << blkbits;
 +              iomap->flags |= IOMAP_F_MERGED;
        }
 -      return ret;
  
 +      if (new)
 +              iomap->flags |= IOMAP_F_NEW;
 +      return 0;
 +}
 +
 +static int
 +ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length,
 +              ssize_t written, unsigned flags, struct iomap *iomap)
 +{
 +      if (iomap->type == IOMAP_MAPPED &&
 +          written < length &&
 +          (flags & IOMAP_WRITE))
 +              ext2_write_failed(inode->i_mapping, offset + length);
 +      return 0;
  }
  
 +struct iomap_ops ext2_iomap_ops = {
 +      .iomap_begin            = ext2_iomap_begin,
 +      .iomap_end              = ext2_iomap_end,
 +};
 +#endif /* CONFIG_FS_DAX */
 +
  int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                u64 start, u64 len)
  {
@@@ -936,10 -863,11 +936,10 @@@ ext2_direct_IO(struct kiocb *iocb, stru
        loff_t offset = iocb->ki_pos;
        ssize_t ret;
  
 -      if (IS_DAX(inode))
 -              ret = dax_do_io(iocb, inode, iter, ext2_get_block, NULL,
 -                              DIO_LOCKING);
 -      else
 -              ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block);
 +      if (WARN_ON_ONCE(IS_DAX(inode)))
 +              return -EIO;
 +
 +      ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block);
        if (ret < 0 && iov_iter_rw(iter) == WRITE)
                ext2_write_failed(mapping, offset + count);
        return ret;
@@@ -1652,7 -1580,7 +1652,7 @@@ int ext2_setattr(struct dentry *dentry
        struct inode *inode = d_inode(dentry);
        int error;
  
-       error = inode_change_ok(inode, iattr);
+       error = setattr_prepare(dentry, iattr);
        if (error)
                return error;
  
diff --combined fs/ext4/inode.c
@@@ -647,19 -647,11 +647,19 @@@ found
                /*
                 * We have to zeroout blocks before inserting them into extent
                 * status tree. Otherwise someone could look them up there and
 -               * use them before they are really zeroed.
 +               * use them before they are really zeroed. We also have to
 +               * unmap metadata before zeroing as otherwise writeback can
 +               * overwrite zeros with stale data from block device.
                 */
                if (flags & EXT4_GET_BLOCKS_ZERO &&
                    map->m_flags & EXT4_MAP_MAPPED &&
                    map->m_flags & EXT4_MAP_NEW) {
 +                      ext4_lblk_t i;
 +
 +                      for (i = 0; i < map->m_len; i++) {
 +                              unmap_underlying_metadata(inode->i_sb->s_bdev,
 +                                                        map->m_pblk + i);
 +                      }
                        ret = ext4_issue_zeroout(inode, map->m_lblk,
                                                 map->m_pblk, map->m_len);
                        if (ret) {
@@@ -1657,8 -1649,6 +1657,8 @@@ static void mpage_release_unused_pages(
                        BUG_ON(!PageLocked(page));
                        BUG_ON(PageWriteback(page));
                        if (invalidate) {
 +                              if (page_mapped(page))
 +                                      clear_page_dirty_for_io(page);
                                block_invalidatepage(page, 0, PAGE_SIZE);
                                ClearPageUptodate(page);
                        }
  
  static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
  {
 -      int unlocked = 0;
 -      struct inode *inode = iocb->ki_filp->f_mapping->host;
 +      struct address_space *mapping = iocb->ki_filp->f_mapping;
 +      struct inode *inode = mapping->host;
        ssize_t ret;
  
 -      if (ext4_should_dioread_nolock(inode)) {
 -              /*
 -               * Nolock dioread optimization may be dynamically disabled
 -               * via ext4_inode_block_unlocked_dio(). Check inode's state
 -               * while holding extra i_dio_count ref.
 -               */
 -              inode_dio_begin(inode);
 -              smp_mb();
 -              if (unlikely(ext4_test_inode_state(inode,
 -                                                  EXT4_STATE_DIOREAD_LOCK)))
 -                      inode_dio_end(inode);
 -              else
 -                      unlocked = 1;
 -      }
 +      /*
 +       * Shared inode_lock is enough for us - it protects against concurrent
 +       * writes & truncates and since we take care of writing back page cache,
 +       * we are protected against page writeback as well.
 +       */
 +      inode_lock_shared(inode);
        if (IS_DAX(inode)) {
 -              ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block,
 -                              NULL, unlocked ? 0 : DIO_LOCKING);
 +              ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block, NULL, 0);
        } else {
 +              size_t count = iov_iter_count(iter);
 +
 +              ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
 +                                                 iocb->ki_pos + count);
 +              if (ret)
 +                      goto out_unlock;
                ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
                                           iter, ext4_dio_get_block,
 -                                         NULL, NULL,
 -                                         unlocked ? 0 : DIO_LOCKING);
 +                                         NULL, NULL, 0);
        }
 -      if (unlocked)
 -              inode_dio_end(inode);
 +out_unlock:
 +      inode_unlock_shared(inode);
        return ret;
  }
  
@@@ -3896,7 -3890,7 +3896,7 @@@ int ext4_update_disksize_before_punch(s
  }
  
  /*
 - * ext4_punch_hole: punches a hole in a file by releaseing the blocks
 + * ext4_punch_hole: punches a hole in a file by releasing the blocks
   * associated with the given offset and length
   *
   * @inode:  File inode
@@@ -3925,7 -3919,7 +3925,7 @@@ int ext4_punch_hole(struct inode *inode
         * Write out all dirty pages to avoid race conditions
         * Then release them.
         */
 -      if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
 +      if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
                ret = filemap_write_and_wait_range(mapping, offset,
                                                   offset + length - 1);
                if (ret)
@@@ -4420,7 -4414,7 +4420,7 @@@ static inline void ext4_iget_extra_inod
  
  int ext4_get_projid(struct inode *inode, kprojid_t *projid)
  {
 -      if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, EXT4_FEATURE_RO_COMPAT_PROJECT))
 +      if (!ext4_has_feature_project(inode->i_sb))
                return -EOPNOTSUPP;
        *projid = EXT4_I(inode)->i_projid;
        return 0;
@@@ -4487,7 -4481,7 +4487,7 @@@ struct inode *ext4_iget(struct super_bl
        inode->i_mode = le16_to_cpu(raw_inode->i_mode);
        i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
        i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
 -      if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_PROJECT) &&
 +      if (ext4_has_feature_project(sb) &&
            EXT4_INODE_SIZE(sb) > EXT4_GOOD_OLD_INODE_SIZE &&
            EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
                i_projid = (projid_t)le32_to_cpu(raw_inode->i_projid);
@@@ -4820,14 -4814,14 +4820,14 @@@ static int ext4_do_update_inode(handle_
   * Fix up interoperability with old kernels. Otherwise, old inodes get
   * re-used with the upper 16 bits of the uid/gid intact
   */
 -              if (!ei->i_dtime) {
 +              if (ei->i_dtime && list_empty(&ei->i_orphan)) {
 +                      raw_inode->i_uid_high = 0;
 +                      raw_inode->i_gid_high = 0;
 +              } else {
                        raw_inode->i_uid_high =
                                cpu_to_le16(high_16_bits(i_uid));
                        raw_inode->i_gid_high =
                                cpu_to_le16(high_16_bits(i_gid));
 -              } else {
 -                      raw_inode->i_uid_high = 0;
 -                      raw_inode->i_gid_high = 0;
                }
        } else {
                raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(i_uid));
                }
        }
  
 -      BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
 -                      EXT4_FEATURE_RO_COMPAT_PROJECT) &&
 +      BUG_ON(!ext4_has_feature_project(inode->i_sb) &&
               i_projid != EXT4_DEF_PROJID);
  
        if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
@@@ -5078,7 -5073,7 +5078,7 @@@ int ext4_setattr(struct dentry *dentry
        int orphan = 0;
        const unsigned int ia_valid = attr->ia_valid;
  
-       error = inode_change_ok(inode, attr);
+       error = setattr_prepare(dentry, attr);
        if (error)
                return error;
  
diff --combined fs/f2fs/acl.c
@@@ -109,16 -109,14 +109,16 @@@ fail
        return ERR_PTR(-EINVAL);
  }
  
 -static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size)
 +static void *f2fs_acl_to_disk(struct f2fs_sb_info *sbi,
 +                              const struct posix_acl *acl, size_t *size)
  {
        struct f2fs_acl_header *f2fs_acl;
        struct f2fs_acl_entry *entry;
        int i;
  
 -      f2fs_acl = f2fs_kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count *
 -                      sizeof(struct f2fs_acl_entry), GFP_NOFS);
 +      f2fs_acl = f2fs_kmalloc(sbi, sizeof(struct f2fs_acl_header) +
 +                      acl->a_count * sizeof(struct f2fs_acl_entry),
 +                      GFP_NOFS);
        if (!f2fs_acl)
                return ERR_PTR(-ENOMEM);
  
@@@ -177,7 -175,7 +177,7 @@@ static struct posix_acl *__f2fs_get_acl
  
        retval = f2fs_getxattr(inode, name_index, "", NULL, 0, dpage);
        if (retval > 0) {
 -              value = f2fs_kmalloc(retval, GFP_F2FS_ZERO);
 +              value = f2fs_kmalloc(F2FS_I_SB(inode), retval, GFP_F2FS_ZERO);
                if (!value)
                        return ERR_PTR(-ENOMEM);
                retval = f2fs_getxattr(inode, name_index, "", value,
@@@ -212,12 -210,10 +212,10 @@@ static int __f2fs_set_acl(struct inode 
        case ACL_TYPE_ACCESS:
                name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
                if (acl) {
-                       error = posix_acl_equiv_mode(acl, &inode->i_mode);
-                       if (error < 0)
+                       error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
+                       if (error)
                                return error;
                        set_acl_inode(inode, inode->i_mode);
-                       if (error == 0)
-                               acl = NULL;
                }
                break;
  
        }
  
        if (acl) {
 -              value = f2fs_acl_to_disk(acl, &size);
 +              value = f2fs_acl_to_disk(F2FS_I_SB(inode), acl, &size);
                if (IS_ERR(value)) {
                        clear_inode_flag(inode, FI_ACL_MODE);
                        return (int)PTR_ERR(value);
diff --combined fs/f2fs/file.c
@@@ -135,7 -135,7 +135,7 @@@ static inline bool need_do_checkpoint(s
  
        if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1)
                need_cp = true;
 -      else if (file_enc_name(inode) && need_dentry_mark(sbi, inode->i_ino))
 +      else if (is_sbi_flag_set(sbi, SBI_NEED_CP))
                need_cp = true;
        else if (file_wrong_pino(inode))
                need_cp = true;
@@@ -523,7 -523,7 +523,7 @@@ static int truncate_partial_data_page(s
                return 0;
  
        if (cache_only) {
 -              page = f2fs_grab_cache_page(mapping, index, false);
 +              page = find_lock_page(mapping, index);
                if (page && PageUptodate(page))
                        goto truncate_out;
                f2fs_put_page(page, 1);
@@@ -680,7 -680,7 +680,7 @@@ int f2fs_setattr(struct dentry *dentry
        struct inode *inode = d_inode(dentry);
        int err;
  
-       err = inode_change_ok(inode, attr);
+       err = setattr_prepare(dentry, attr);
        if (err)
                return err;
  
@@@ -1454,7 -1454,7 +1454,7 @@@ static int f2fs_ioc_setflags(struct fil
  {
        struct inode *inode = file_inode(filp);
        struct f2fs_inode_info *fi = F2FS_I(inode);
 -      unsigned int flags = fi->i_flags & FS_FL_USER_VISIBLE;
 +      unsigned int flags;
        unsigned int oldflags;
        int ret;
  
@@@ -1954,7 -1954,7 +1954,7 @@@ static int f2fs_defragment_range(struc
         * avoid defragment running in SSR mode when free section are allocated
         * intensively
         */
 -      if (has_not_enough_free_secs(sbi, sec_num)) {
 +      if (has_not_enough_free_secs(sbi, 0, sec_num)) {
                err = -EAGAIN;
                goto out;
        }
@@@ -2085,13 -2085,6 +2085,13 @@@ static int f2fs_move_file_range(struct 
        if (f2fs_encrypted_inode(src) || f2fs_encrypted_inode(dst))
                return -EOPNOTSUPP;
  
 +      if (src == dst) {
 +              if (pos_in == pos_out)
 +                      return 0;
 +              if (pos_out > pos_in && pos_out < pos_in + len)
 +                      return -EINVAL;
 +      }
 +
        inode_lock(src);
        if (src != dst) {
                if (!inode_trylock(dst)) {
  
        f2fs_balance_fs(sbi, true);
        f2fs_lock_op(sbi);
 -      ret = __exchange_data_block(src, dst, pos_in,
 -                              pos_out, len >> F2FS_BLKSIZE_BITS, false);
 +      ret = __exchange_data_block(src, dst, pos_in >> F2FS_BLKSIZE_BITS,
 +                              pos_out >> F2FS_BLKSIZE_BITS,
 +                              len >> F2FS_BLKSIZE_BITS, false);
  
        if (!ret) {
                if (dst_max_i_size)
diff --combined fs/f2fs/node.c
@@@ -54,6 -54,8 +54,6 @@@ bool available_free_memory(struct f2fs_
                res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
                if (excess_cached_nats(sbi))
                        res = false;
 -              if (nm_i->nat_cnt > DEF_NAT_CACHE_THRESHOLD)
 -                      res = false;
        } else if (type == DIRTY_DENTS) {
                if (sbi->sb->s_bdi->wb.dirty_exceeded)
                        return false;
@@@ -1312,7 -1314,6 +1312,7 @@@ int fsync_node_pages(struct f2fs_sb_inf
        struct page *last_page = NULL;
        bool marked = false;
        nid_t ino = inode->i_ino;
 +      int nwritten = 0;
  
        if (atomic) {
                last_page = last_fsync_dnode(sbi, ino);
@@@ -1386,10 -1387,7 +1386,10 @@@ continue_unlock
                                unlock_page(page);
                                f2fs_put_page(last_page, 0);
                                break;
 +                      } else {
 +                              nwritten++;
                        }
 +
                        if (page == last_page) {
                                f2fs_put_page(page, 0);
                                marked = true;
                unlock_page(last_page);
                goto retry;
        }
 +
 +      if (nwritten)
 +              f2fs_submit_merged_bio_cond(sbi, NULL, NULL, ino, NODE, WRITE);
        return ret ? -EIO: 0;
  }
  
@@@ -1423,7 -1418,6 +1423,7 @@@ int sync_node_pages(struct f2fs_sb_inf
        struct pagevec pvec;
        int step = 0;
        int nwritten = 0;
 +      int ret = 0;
  
        pagevec_init(&pvec, 0);
  
@@@ -1444,8 -1438,7 +1444,8 @@@ next_step
  
                        if (unlikely(f2fs_cp_error(sbi))) {
                                pagevec_release(&pvec);
 -                              return -EIO;
 +                              ret = -EIO;
 +                              goto out;
                        }
  
                        /*
@@@ -1496,8 -1489,6 +1496,8 @@@ continue_unlock
  
                        if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc))
                                unlock_page(page);
 +                      else
 +                              nwritten++;
  
                        if (--wbc->nr_to_write == 0)
                                break;
                step++;
                goto next_step;
        }
 -      return nwritten;
 +out:
 +      if (nwritten)
 +              f2fs_submit_merged_bio(sbi, NODE, WRITE);
 +      return ret;
  }
  
  int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
  {
        pgoff_t index = 0, end = ULONG_MAX;
        struct pagevec pvec;
-       int ret2 = 0, ret = 0;
+       int ret2, ret = 0;
  
        pagevec_init(&pvec, 0);
  
                cond_resched();
        }
  
-       if (unlikely(test_and_clear_bit(AS_ENOSPC, &NODE_MAPPING(sbi)->flags)))
-               ret2 = -ENOSPC;
-       if (unlikely(test_and_clear_bit(AS_EIO, &NODE_MAPPING(sbi)->flags)))
-               ret2 = -EIO;
+       ret2 = filemap_check_errors(NODE_MAPPING(sbi));
        if (!ret)
                ret = ret2;
        return ret;
@@@ -1684,9 -1669,6 +1681,9 @@@ const struct address_space_operations f
        .set_page_dirty = f2fs_set_node_page_dirty,
        .invalidatepage = f2fs_invalidate_page,
        .releasepage    = f2fs_release_page,
 +#ifdef CONFIG_MIGRATION
 +      .migratepage    = f2fs_migrate_page,
 +#endif
  };
  
  static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
@@@ -1853,7 -1835,7 +1850,7 @@@ bool alloc_nid(struct f2fs_sb_info *sbi
        struct free_nid *i = NULL;
  retry:
  #ifdef CONFIG_F2FS_FAULT_INJECTION
 -      if (time_to_inject(FAULT_ALLOC_NID))
 +      if (time_to_inject(sbi, FAULT_ALLOC_NID))
                return false;
  #endif
        if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids))
@@@ -2030,12 -2012,10 +2027,12 @@@ int recover_inode_page(struct f2fs_sb_i
  
        if (unlikely(old_ni.blk_addr != NULL_ADDR))
                return -EINVAL;
 -
 +retry:
        ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false);
 -      if (!ipage)
 -              return -ENOMEM;
 +      if (!ipage) {
 +              congestion_wait(BLK_RW_ASYNC, HZ/50);
 +              goto retry;
 +      }
  
        /* Should not use this inode from free nid list */
        remove_free_nid(NM_I(sbi), ino);
diff --combined fs/fuse/dir.c
@@@ -13,8 -13,6 +13,8 @@@
  #include <linux/sched.h>
  #include <linux/namei.h>
  #include <linux/slab.h>
 +#include <linux/xattr.h>
 +#include <linux/posix_acl.h>
  
  static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
  {
@@@ -39,39 -37,47 +39,39 @@@ static void fuse_advise_use_readdirplus
        set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
  }
  
 -#if BITS_PER_LONG >= 64
 +union fuse_dentry {
 +      u64 time;
 +      struct rcu_head rcu;
 +};
 +
  static inline void fuse_dentry_settime(struct dentry *entry, u64 time)
  {
 -      entry->d_time = time;
 +      ((union fuse_dentry *) entry->d_fsdata)->time = time;
  }
  
  static inline u64 fuse_dentry_time(struct dentry *entry)
  {
 -      return entry->d_time;
 -}
 -#else
 -/*
 - * On 32 bit archs store the high 32 bits of time in d_fsdata
 - */
 -static void fuse_dentry_settime(struct dentry *entry, u64 time)
 -{
 -      entry->d_time = time;
 -      entry->d_fsdata = (void *) (unsigned long) (time >> 32);
 -}
 -
 -static u64 fuse_dentry_time(struct dentry *entry)
 -{
 -      return (u64) entry->d_time +
 -              ((u64) (unsigned long) entry->d_fsdata << 32);
 +      return ((union fuse_dentry *) entry->d_fsdata)->time;
  }
 -#endif
  
  /*
   * FUSE caches dentries and attributes with separate timeout.  The
   * time in jiffies until the dentry/attributes are valid is stored in
 - * dentry->d_time and fuse_inode->i_time respectively.
 + * dentry->d_fsdata and fuse_inode->i_time respectively.
   */
  
  /*
   * Calculate the time in jiffies until a dentry/attributes are valid
   */
 -static u64 time_to_jiffies(unsigned long sec, unsigned long nsec)
 +static u64 time_to_jiffies(u64 sec, u32 nsec)
  {
        if (sec || nsec) {
 -              struct timespec ts = {sec, nsec};
 -              return get_jiffies_64() + timespec_to_jiffies(&ts);
 +              struct timespec64 ts = {
 +                      sec,
 +                      max_t(u32, nsec, NSEC_PER_SEC - 1)
 +              };
 +
 +              return get_jiffies_64() + timespec64_to_jiffies(&ts);
        } else
                return 0;
  }
@@@ -237,7 -243,6 +237,7 @@@ static int fuse_dentry_revalidate(struc
                if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
                        goto invalid;
  
 +              forget_all_cached_acls(inode);
                fuse_change_attributes(inode, &outarg.attr,
                                       entry_attr_timeout(&outarg),
                                       attr_version);
@@@ -267,23 -272,8 +267,23 @@@ static int invalid_nodeid(u64 nodeid
        return !nodeid || nodeid == FUSE_ROOT_ID;
  }
  
 +static int fuse_dentry_init(struct dentry *dentry)
 +{
 +      dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry), GFP_KERNEL);
 +
 +      return dentry->d_fsdata ? 0 : -ENOMEM;
 +}
 +static void fuse_dentry_release(struct dentry *dentry)
 +{
 +      union fuse_dentry *fd = dentry->d_fsdata;
 +
 +      kfree_rcu(fd, rcu);
 +}
 +
  const struct dentry_operations fuse_dentry_operations = {
        .d_revalidate   = fuse_dentry_revalidate,
 +      .d_init         = fuse_dentry_init,
 +      .d_release      = fuse_dentry_release,
  };
  
  int fuse_valid_type(int m)
@@@ -644,7 -634,7 +644,7 @@@ static int fuse_symlink(struct inode *d
        return create_new_entry(fc, &args, dir, entry, S_IFLNK);
  }
  
 -static inline void fuse_update_ctime(struct inode *inode)
 +void fuse_update_ctime(struct inode *inode)
  {
        if (!IS_NOCMTIME(inode)) {
                inode->i_ctime = current_fs_time(inode->i_sb);
@@@ -927,7 -917,6 +927,7 @@@ int fuse_update_attributes(struct inod
  
        if (time_before64(fi->i_time, get_jiffies_64())) {
                r = true;
 +              forget_all_cached_acls(inode);
                err = fuse_do_getattr(inode, stat, file);
        } else {
                r = false;
@@@ -1028,7 -1017,7 +1028,7 @@@ int fuse_allow_current_process(struct f
  {
        const struct cred *cred;
  
 -      if (fc->flags & FUSE_ALLOW_OTHER)
 +      if (fc->allow_other)
                return 1;
  
        cred = current_cred();
@@@ -1075,7 -1064,6 +1075,7 @@@ static int fuse_perm_getattr(struct ino
        if (mask & MAY_NOT_BLOCK)
                return -ECHILD;
  
 +      forget_all_cached_acls(inode);
        return fuse_do_getattr(inode, NULL, NULL);
  }
  
@@@ -1104,7 -1092,7 +1104,7 @@@ static int fuse_permission(struct inod
        /*
         * If attributes are needed, refresh them before proceeding
         */
 -      if ((fc->flags & FUSE_DEFAULT_PERMISSIONS) ||
 +      if (fc->default_permissions ||
            ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
                struct fuse_inode *fi = get_fuse_inode(inode);
  
                }
        }
  
 -      if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
 +      if (fc->default_permissions) {
                err = generic_permission(inode, mask);
  
                /* If permission is denied, try to refresh file
@@@ -1245,7 -1233,6 +1245,7 @@@ retry
                fi->nlookup++;
                spin_unlock(&fc->lock);
  
 +              forget_all_cached_acls(inode);
                fuse_change_attributes(inode, &o->attr,
                                       entry_attr_timeout(o),
                                       attr_version);
@@@ -1604,9 -1591,10 +1604,10 @@@ int fuse_flush_times(struct inode *inod
   * vmtruncate() doesn't allow for this case, so do the rlimit checking
   * and the actual truncation by hand.
   */
- int fuse_do_setattr(struct inode *inode, struct iattr *attr,
+ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
                    struct file *file)
  {
+       struct inode *inode = d_inode(dentry);
        struct fuse_conn *fc = get_fuse_conn(inode);
        struct fuse_inode *fi = get_fuse_inode(inode);
        FUSE_ARGS(args);
        int err;
        bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
  
 -      if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
 +      if (!fc->default_permissions)
                attr->ia_valid |= ATTR_FORCE;
  
-       err = inode_change_ok(inode, attr);
+       err = setattr_prepare(dentry, attr);
        if (err)
                return err;
  
@@@ -1713,77 -1701,174 +1714,77 @@@ error
  }
  
  static int fuse_setattr(struct dentry *entry, struct iattr *attr)
 -{
 -      struct inode *inode = d_inode(entry);
 -
 -      if (!fuse_allow_current_process(get_fuse_conn(inode)))
 -              return -EACCES;
 -
 -      if (attr->ia_valid & ATTR_FILE)
 -              return fuse_do_setattr(entry, attr, attr->ia_file);
 -      else
 -              return fuse_do_setattr(entry, attr, NULL);
 -}
 -
 -static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
 -                      struct kstat *stat)
  {
        struct inode *inode = d_inode(entry);
        struct fuse_conn *fc = get_fuse_conn(inode);
 +      struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
 +      int ret;
  
 -      if (!fuse_allow_current_process(fc))
 +      if (!fuse_allow_current_process(get_fuse_conn(inode)))
                return -EACCES;
  
 -      return fuse_update_attributes(inode, stat, NULL, NULL);
 -}
 -
 -static int fuse_setxattr(struct dentry *unused, struct inode *inode,
 -                       const char *name, const void *value,
 -                       size_t size, int flags)
 -{
 -      struct fuse_conn *fc = get_fuse_conn(inode);
 -      FUSE_ARGS(args);
 -      struct fuse_setxattr_in inarg;
 -      int err;
 -
 -      if (fc->no_setxattr)
 -              return -EOPNOTSUPP;
 +      if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
 +              attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
 +                                  ATTR_MODE);
  
 -      memset(&inarg, 0, sizeof(inarg));
 -      inarg.size = size;
 -      inarg.flags = flags;
 -      args.in.h.opcode = FUSE_SETXATTR;
 -      args.in.h.nodeid = get_node_id(inode);
 -      args.in.numargs = 3;
 -      args.in.args[0].size = sizeof(inarg);
 -      args.in.args[0].value = &inarg;
 -      args.in.args[1].size = strlen(name) + 1;
 -      args.in.args[1].value = name;
 -      args.in.args[2].size = size;
 -      args.in.args[2].value = value;
 -      err = fuse_simple_request(fc, &args);
 -      if (err == -ENOSYS) {
 -              fc->no_setxattr = 1;
 -              err = -EOPNOTSUPP;
 -      }
 -      if (!err) {
 -              fuse_invalidate_attr(inode);
 -              fuse_update_ctime(inode);
 +              /*
 +               * The only sane way to reliably kill suid/sgid is to do it in
 +               * the userspace filesystem
 +               *
 +               * This should be done on write(), truncate() and chown().
 +               */
 +              if (!fc->handle_killpriv) {
 +                      int kill;
 +
 +                      /*
 +                       * ia_mode calculation may have used stale i_mode.
 +                       * Refresh and recalculate.
 +                       */
 +                      ret = fuse_do_getattr(inode, NULL, file);
 +                      if (ret)
 +                              return ret;
 +
 +                      attr->ia_mode = inode->i_mode;
 +                      kill = should_remove_suid(entry);
 +                      if (kill & ATTR_KILL_SUID) {
 +                              attr->ia_valid |= ATTR_MODE;
 +                              attr->ia_mode &= ~S_ISUID;
 +                      }
 +                      if (kill & ATTR_KILL_SGID) {
 +                              attr->ia_valid |= ATTR_MODE;
 +                              attr->ia_mode &= ~S_ISGID;
 +                      }
 +              }
        }
 -      return err;
 -}
 -
 -static ssize_t fuse_getxattr(struct dentry *entry, struct inode *inode,
 -                           const char *name, void *value, size_t size)
 -{
 -      struct fuse_conn *fc = get_fuse_conn(inode);
 -      FUSE_ARGS(args);
 -      struct fuse_getxattr_in inarg;
 -      struct fuse_getxattr_out outarg;
 -      ssize_t ret;
 +      if (!attr->ia_valid)
 +              return 0;
  
-       ret = fuse_do_setattr(inode, attr, file);
 -      if (fc->no_getxattr)
 -              return -EOPNOTSUPP;
++      ret = fuse_do_setattr(entry, attr, file);
 +      if (!ret) {
 +              /*
 +               * If filesystem supports acls it may have updated acl xattrs in
 +               * the filesystem, so forget cached acls for the inode.
 +               */
 +              if (fc->posix_acl)
 +                      forget_all_cached_acls(inode);
  
 -      memset(&inarg, 0, sizeof(inarg));
 -      inarg.size = size;
 -      args.in.h.opcode = FUSE_GETXATTR;
 -      args.in.h.nodeid = get_node_id(inode);
 -      args.in.numargs = 2;
 -      args.in.args[0].size = sizeof(inarg);
 -      args.in.args[0].value = &inarg;
 -      args.in.args[1].size = strlen(name) + 1;
 -      args.in.args[1].value = name;
 -      /* This is really two different operations rolled into one */
 -      args.out.numargs = 1;
 -      if (size) {
 -              args.out.argvar = 1;
 -              args.out.args[0].size = size;
 -              args.out.args[0].value = value;
 -      } else {
 -              args.out.args[0].size = sizeof(outarg);
 -              args.out.args[0].value = &outarg;
 -      }
 -      ret = fuse_simple_request(fc, &args);
 -      if (!ret && !size)
 -              ret = outarg.size;
 -      if (ret == -ENOSYS) {
 -              fc->no_getxattr = 1;
 -              ret = -EOPNOTSUPP;
 +              /* Directory mode changed, may need to revalidate access */
 +              if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
 +                      fuse_invalidate_entry_cache(entry);
        }
        return ret;
  }
  
 -static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
 +static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
 +                      struct kstat *stat)
  {
        struct inode *inode = d_inode(entry);
        struct fuse_conn *fc = get_fuse_conn(inode);
 -      FUSE_ARGS(args);
 -      struct fuse_getxattr_in inarg;
 -      struct fuse_getxattr_out outarg;
 -      ssize_t ret;
  
        if (!fuse_allow_current_process(fc))
                return -EACCES;
  
 -      if (fc->no_listxattr)
 -              return -EOPNOTSUPP;
 -
 -      memset(&inarg, 0, sizeof(inarg));
 -      inarg.size = size;
 -      args.in.h.opcode = FUSE_LISTXATTR;
 -      args.in.h.nodeid = get_node_id(inode);
 -      args.in.numargs = 1;
 -      args.in.args[0].size = sizeof(inarg);
 -      args.in.args[0].value = &inarg;
 -      /* This is really two different operations rolled into one */
 -      args.out.numargs = 1;
 -      if (size) {
 -              args.out.argvar = 1;
 -              args.out.args[0].size = size;
 -              args.out.args[0].value = list;
 -      } else {
 -              args.out.args[0].size = sizeof(outarg);
 -              args.out.args[0].value = &outarg;
 -      }
 -      ret = fuse_simple_request(fc, &args);
 -      if (!ret && !size)
 -              ret = outarg.size;
 -      if (ret == -ENOSYS) {
 -              fc->no_listxattr = 1;
 -              ret = -EOPNOTSUPP;
 -      }
 -      return ret;
 -}
 -
 -static int fuse_removexattr(struct dentry *entry, const char *name)
 -{
 -      struct inode *inode = d_inode(entry);
 -      struct fuse_conn *fc = get_fuse_conn(inode);
 -      FUSE_ARGS(args);
 -      int err;
 -
 -      if (fc->no_removexattr)
 -              return -EOPNOTSUPP;
 -
 -      args.in.h.opcode = FUSE_REMOVEXATTR;
 -      args.in.h.nodeid = get_node_id(inode);
 -      args.in.numargs = 1;
 -      args.in.args[0].size = strlen(name) + 1;
 -      args.in.args[0].value = name;
 -      err = fuse_simple_request(fc, &args);
 -      if (err == -ENOSYS) {
 -              fc->no_removexattr = 1;
 -              err = -EOPNOTSUPP;
 -      }
 -      if (!err) {
 -              fuse_invalidate_attr(inode);
 -              fuse_update_ctime(inode);
 -      }
 -      return err;
 +      return fuse_update_attributes(inode, stat, NULL, NULL);
  }
  
  static const struct inode_operations fuse_dir_inode_operations = {
        .mknod          = fuse_mknod,
        .permission     = fuse_permission,
        .getattr        = fuse_getattr,
 -      .setxattr       = fuse_setxattr,
 -      .getxattr       = fuse_getxattr,
 +      .setxattr       = generic_setxattr,
 +      .getxattr       = generic_getxattr,
        .listxattr      = fuse_listxattr,
 -      .removexattr    = fuse_removexattr,
 +      .removexattr    = generic_removexattr,
 +      .get_acl        = fuse_get_acl,
 +      .set_acl        = fuse_set_acl,
  };
  
  static const struct file_operations fuse_dir_operations = {
@@@ -1823,12 -1906,10 +1824,12 @@@ static const struct inode_operations fu
        .setattr        = fuse_setattr,
        .permission     = fuse_permission,
        .getattr        = fuse_getattr,
 -      .setxattr       = fuse_setxattr,
 -      .getxattr       = fuse_getxattr,
 +      .setxattr       = generic_setxattr,
 +      .getxattr       = generic_getxattr,
        .listxattr      = fuse_listxattr,
 -      .removexattr    = fuse_removexattr,
 +      .removexattr    = generic_removexattr,
 +      .get_acl        = fuse_get_acl,
 +      .set_acl        = fuse_set_acl,
  };
  
  static const struct inode_operations fuse_symlink_inode_operations = {
        .get_link       = fuse_get_link,
        .readlink       = generic_readlink,
        .getattr        = fuse_getattr,
 -      .setxattr       = fuse_setxattr,
 -      .getxattr       = fuse_getxattr,
 +      .setxattr       = generic_setxattr,
 +      .getxattr       = generic_getxattr,
        .listxattr      = fuse_listxattr,
 -      .removexattr    = fuse_removexattr,
 +      .removexattr    = generic_removexattr,
  };
  
  void fuse_init_common(struct inode *inode)
diff --combined fs/fuse/file.c
@@@ -2326,6 -2326,49 +2326,6 @@@ static loff_t fuse_file_llseek(struct f
        return retval;
  }
  
 -static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
 -                      unsigned int nr_segs, size_t bytes, bool to_user)
 -{
 -      struct iov_iter ii;
 -      int page_idx = 0;
 -
 -      if (!bytes)
 -              return 0;
 -
 -      iov_iter_init(&ii, to_user ? READ : WRITE, iov, nr_segs, bytes);
 -
 -      while (iov_iter_count(&ii)) {
 -              struct page *page = pages[page_idx++];
 -              size_t todo = min_t(size_t, PAGE_SIZE, iov_iter_count(&ii));
 -              void *kaddr;
 -
 -              kaddr = kmap(page);
 -
 -              while (todo) {
 -                      char __user *uaddr = ii.iov->iov_base + ii.iov_offset;
 -                      size_t iov_len = ii.iov->iov_len - ii.iov_offset;
 -                      size_t copy = min(todo, iov_len);
 -                      size_t left;
 -
 -                      if (!to_user)
 -                              left = copy_from_user(kaddr, uaddr, copy);
 -                      else
 -                              left = copy_to_user(uaddr, kaddr, copy);
 -
 -                      if (unlikely(left))
 -                              return -EFAULT;
 -
 -                      iov_iter_advance(&ii, copy);
 -                      todo -= copy;
 -                      kaddr += copy;
 -              }
 -
 -              kunmap(page);
 -      }
 -
 -      return 0;
 -}
 -
  /*
   * CUSE servers compiled on 32bit broke on 64bit kernels because the
   * ABI was defined to be 'struct iovec' which is different on 32bit
@@@ -2477,9 -2520,8 +2477,9 @@@ long fuse_do_ioctl(struct file *file, u
        struct iovec *iov_page = NULL;
        struct iovec *in_iov = NULL, *out_iov = NULL;
        unsigned int in_iovs = 0, out_iovs = 0, num_pages = 0, max_pages;
 -      size_t in_size, out_size, transferred;
 -      int err;
 +      size_t in_size, out_size, transferred, c;
 +      int err, i;
 +      struct iov_iter ii;
  
  #if BITS_PER_LONG == 32
        inarg.flags |= FUSE_IOCTL_32BIT;
                req->in.args[1].size = in_size;
                req->in.argpages = 1;
  
 -              err = fuse_ioctl_copy_user(pages, in_iov, in_iovs, in_size,
 -                                         false);
 -              if (err)
 -                      goto out;
 +              err = -EFAULT;
 +              iov_iter_init(&ii, WRITE, in_iov, in_iovs, in_size);
 +              for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= num_pages); i++) {
 +                      c = copy_page_from_iter(pages[i], 0, PAGE_SIZE, &ii);
 +                      if (c != PAGE_SIZE && iov_iter_count(&ii))
 +                              goto out;
 +              }
        }
  
        req->out.numargs = 2;
        if (transferred > inarg.out_size)
                goto out;
  
 -      err = fuse_ioctl_copy_user(pages, out_iov, out_iovs, transferred, true);
 +      err = -EFAULT;
 +      iov_iter_init(&ii, READ, out_iov, out_iovs, transferred);
 +      for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= num_pages); i++) {
 +              c = copy_page_to_iter(pages[i], 0, PAGE_SIZE, &ii);
 +              if (c != PAGE_SIZE && iov_iter_count(&ii))
 +                      goto out;
 +      }
 +      err = 0;
   out:
        if (req)
                fuse_put_request(fc, req);
@@@ -2810,7 -2842,7 +2810,7 @@@ static void fuse_do_truncate(struct fil
        attr.ia_file = file;
        attr.ia_valid |= ATTR_FILE;
  
-       fuse_do_setattr(inode, &attr, file);
+       fuse_do_setattr(file_dentry(file), &attr, file);
  }
  
  static inline loff_t fuse_round_up(loff_t off)
diff --combined fs/fuse/fuse_i.h
@@@ -23,7 -23,6 +23,7 @@@
  #include <linux/poll.h>
  #include <linux/workqueue.h>
  #include <linux/kref.h>
 +#include <linux/xattr.h>
  
  /** Max number of pages that can be used in a single read request */
  #define FUSE_MAX_PAGES_PER_REQ 32
  /** Number of dentries for each connection in the control filesystem */
  #define FUSE_CTL_NUM_DENTRIES 5
  
 -/** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem
 -    module will check permissions based on the file mode.  Otherwise no
 -    permission checking is done in the kernel */
 -#define FUSE_DEFAULT_PERMISSIONS (1 << 0)
 -
 -/** If the FUSE_ALLOW_OTHER flag is given, then not only the user
 -    doing the mount will be allowed to access the filesystem */
 -#define FUSE_ALLOW_OTHER         (1 << 1)
 -
  /** Number of page pointers embedded in fuse_req */
  #define FUSE_REQ_INLINE_PAGES 1
  
@@@ -461,6 -469,9 +461,6 @@@ struct fuse_conn 
        /** The group id for this mount */
        kgid_t group_id;
  
 -      /** The fuse mount flags for this mount */
 -      unsigned flags;
 -
        /** Maximum read size */
        unsigned max_read;
  
        /** allow parallel lookups and readdir (default is serialized) */
        unsigned parallel_dirops:1;
  
 +      /** handle fs handles killing suid/sgid/cap on write/chown/trunc */
 +      unsigned handle_killpriv:1;
 +
        /*
         * The following bitfields are only for optimization purposes
         * and hence races in setting them will not cause malfunction
        /** Is lseek not implemented by fs? */
        unsigned no_lseek:1;
  
 +      /** Does the filesystem support posix acls? */
 +      unsigned posix_acl:1;
 +
 +      /** Check permissions based on the file mode or not? */
 +      unsigned default_permissions:1;
 +
 +      /** Allow other than the mounter user to access the filesystem ? */
 +      unsigned allow_other:1;
 +
        /** The number of requests waiting for completion */
        atomic_t num_waiting;
  
@@@ -903,8 -902,6 +903,8 @@@ int fuse_allow_current_process(struct f
  
  u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id);
  
 +void fuse_update_ctime(struct inode *inode);
 +
  int fuse_update_attributes(struct inode *inode, struct kstat *stat,
                           struct file *file, bool *refreshed);
  
@@@ -961,7 -958,7 +961,7 @@@ bool fuse_write_update_size(struct inod
  int fuse_flush_times(struct inode *inode, struct fuse_file *ff);
  int fuse_write_inode(struct inode *inode, struct writeback_control *wbc);
  
- int fuse_do_setattr(struct inode *inode, struct iattr *attr,
+ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
                    struct file *file);
  
  void fuse_set_initialized(struct fuse_conn *fc);
  void fuse_unlock_inode(struct inode *inode);
  void fuse_lock_inode(struct inode *inode);
  
 +int fuse_setxattr(struct inode *inode, const char *name, const void *value,
 +                size_t size, int flags);
 +ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value,
 +                    size_t size);
 +ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size);
 +int fuse_removexattr(struct inode *inode, const char *name);
 +extern const struct xattr_handler *fuse_xattr_handlers[];
 +extern const struct xattr_handler *fuse_acl_xattr_handlers[];
 +
 +struct posix_acl;
 +struct posix_acl *fuse_get_acl(struct inode *inode, int type);
 +int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type);
 +
  #endif /* _FS_FUSE_I_H */
diff --combined fs/gfs2/inode.c
@@@ -187,10 -187,6 +187,10 @@@ struct inode *gfs2_inode_lookup(struct 
                }
  
                gfs2_set_iop(inode);
 +
 +              inode->i_atime.tv_sec = 0;
 +              inode->i_atime.tv_nsec = 0;
 +
                unlock_new_inode(inode);
        }
  
@@@ -1936,7 -1932,7 +1936,7 @@@ static int gfs2_setattr(struct dentry *
        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
                goto out;
  
-       error = inode_change_ok(inode, attr);
+       error = setattr_prepare(dentry, attr);
        if (error)
                goto out;
  
diff --combined fs/hugetlbfs/inode.c
@@@ -416,6 -416,7 +416,6 @@@ static void remove_inode_hugepages(stru
  
                for (i = 0; i < pagevec_count(&pvec); ++i) {
                        struct page *page = pvec.pages[i];
 -                      bool rsv_on_error;
                        u32 hash;
  
                        /*
                         * cache (remove_huge_page) BEFORE removing the
                         * region/reserve map (hugetlb_unreserve_pages).  In
                         * rare out of memory conditions, removal of the
 -                       * region/reserve map could fail.  Before free'ing
 -                       * the page, note PagePrivate which is used in case
 -                       * of error.
 +                       * region/reserve map could fail. Correspondingly,
 +                       * the subpool and global reserve usage count can need
 +                       * to be adjusted.
                         */
 -                      rsv_on_error = !PagePrivate(page);
 +                      VM_BUG_ON(PagePrivate(page));
                        remove_huge_page(page);
                        freed++;
                        if (!truncate_op) {
                                if (unlikely(hugetlb_unreserve_pages(inode,
                                                        next, next + 1, 1)))
 -                                      hugetlb_fix_reserve_counts(inode,
 -                                                              rsv_on_error);
 +                                      hugetlb_fix_reserve_counts(inode);
                        }
  
                        unlock_page(page);
@@@ -670,7 -672,7 +670,7 @@@ static int hugetlbfs_setattr(struct den
  
        BUG_ON(!inode);
  
-       error = inode_change_ok(inode, attr);
+       error = setattr_prepare(dentry, attr);
        if (error)
                return error;
  
diff --combined fs/internal.h
@@@ -12,7 -12,6 +12,7 @@@
  struct super_block;
  struct file_system_type;
  struct iomap;
 +struct iomap_ops;
  struct linux_binprm;
  struct path;
  struct mount;
@@@ -121,6 -120,15 +121,15 @@@ extern long prune_icache_sb(struct supe
  extern void inode_add_lru(struct inode *inode);
  extern int dentry_needs_remove_privs(struct dentry *dentry);
  
+ extern bool __atime_needs_update(const struct path *, struct inode *, bool);
+ static inline bool atime_needs_update_rcu(const struct path *path,
+                                         struct inode *inode)
+ {
+       return __atime_needs_update(path, inode, true);
+ }
+ extern bool atime_needs_update_rcu(const struct path *, struct inode *);
  /*
   * fs-writeback.c
   */
@@@ -157,7 -165,7 +166,7 @@@ extern void mnt_pin_kill(struct mount *
  /*
   * fs/nsfs.c
   */
- extern struct dentry_operations ns_dentry_operations;
+ extern const struct dentry_operations ns_dentry_operations;
  
  /*
   * fs/ioctl.c
  extern int do_vfs_ioctl(struct file *file, unsigned int fd, unsigned int cmd,
                    unsigned long arg);
  extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 +
 +/*
 + * iomap support:
 + */
 +typedef loff_t (*iomap_actor_t)(struct inode *inode, loff_t pos, loff_t len,
 +              void *data, struct iomap *iomap);
 +
 +loff_t iomap_apply(struct inode *inode, loff_t pos, loff_t length,
 +              unsigned flags, struct iomap_ops *ops, void *data,
 +              iomap_actor_t actor);
diff --combined fs/locks.c
  #include <linux/pid_namespace.h>
  #include <linux/hashtable.h>
  #include <linux/percpu.h>
 -#include <linux/lglock.h>
  
  #define CREATE_TRACE_POINTS
  #include <trace/events/filelock.h>
  #define IS_LEASE(fl)  (fl->fl_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT))
  #define IS_OFDLCK(fl) (fl->fl_flags & FL_OFDLCK)
  
+ static inline bool is_remote_lock(struct file *filp)
+ {
+       return likely(!(filp->f_path.dentry->d_sb->s_flags & MS_NOREMOTELOCK));
+ }
  static bool lease_breaking(struct file_lock *fl)
  {
        return fl->fl_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING);
@@@ -157,18 -163,12 +162,18 @@@ int lease_break_time = 45
  
  /*
   * The global file_lock_list is only used for displaying /proc/locks, so we
 - * keep a list on each CPU, with each list protected by its own spinlock via
 - * the file_lock_lglock. Note that alterations to the list also require that
 - * the relevant flc_lock is held.
 + * keep a list on each CPU, with each list protected by its own spinlock.
 + * Global serialization is done using file_rwsem.
 + *
 + * Note that alterations to the list also require that the relevant flc_lock is
 + * held.
   */
 -DEFINE_STATIC_LGLOCK(file_lock_lglock);
 -static DEFINE_PER_CPU(struct hlist_head, file_lock_list);
 +struct file_lock_list_struct {
 +      spinlock_t              lock;
 +      struct hlist_head       hlist;
 +};
 +static DEFINE_PER_CPU(struct file_lock_list_struct, file_lock_list);
 +DEFINE_STATIC_PERCPU_RWSEM(file_rwsem);
  
  /*
   * The blocked_hash is used to find POSIX lock loops for deadlock detection.
@@@ -592,23 -592,15 +597,23 @@@ static int posix_same_owner(struct file
  /* Must be called with the flc_lock held! */
  static void locks_insert_global_locks(struct file_lock *fl)
  {
 -      lg_local_lock(&file_lock_lglock);
 +      struct file_lock_list_struct *fll = this_cpu_ptr(&file_lock_list);
 +
 +      percpu_rwsem_assert_held(&file_rwsem);
 +
 +      spin_lock(&fll->lock);
        fl->fl_link_cpu = smp_processor_id();
 -      hlist_add_head(&fl->fl_link, this_cpu_ptr(&file_lock_list));
 -      lg_local_unlock(&file_lock_lglock);
 +      hlist_add_head(&fl->fl_link, &fll->hlist);
 +      spin_unlock(&fll->lock);
  }
  
  /* Must be called with the flc_lock held! */
  static void locks_delete_global_locks(struct file_lock *fl)
  {
 +      struct file_lock_list_struct *fll;
 +
 +      percpu_rwsem_assert_held(&file_rwsem);
 +
        /*
         * Avoid taking lock if already unhashed. This is safe since this check
         * is done while holding the flc_lock, and new insertions into the list
         */
        if (hlist_unhashed(&fl->fl_link))
                return;
 -      lg_local_lock_cpu(&file_lock_lglock, fl->fl_link_cpu);
 +
 +      fll = per_cpu_ptr(&file_lock_list, fl->fl_link_cpu);
 +      spin_lock(&fll->lock);
        hlist_del_init(&fl->fl_link);
 -      lg_local_unlock_cpu(&file_lock_lglock, fl->fl_link_cpu);
 +      spin_unlock(&fll->lock);
  }
  
  static unsigned long
@@@ -806,7 -796,7 +811,7 @@@ posix_test_lock(struct file *filp, stru
  {
        struct file_lock *cfl;
        struct file_lock_context *ctx;
-       struct inode *inode = file_inode(filp);
+       struct inode *inode = locks_inode(filp);
  
        ctx = smp_load_acquire(&inode->i_flctx);
        if (!ctx || list_empty_careful(&ctx->flc_posix)) {
@@@ -930,7 -920,6 +935,7 @@@ static int flock_lock_inode(struct inod
                        return -ENOMEM;
        }
  
 +      percpu_down_read_preempt_disable(&file_rwsem);
        spin_lock(&ctx->flc_lock);
        if (request->fl_flags & FL_ACCESS)
                goto find_conflict;
@@@ -971,7 -960,6 +976,7 @@@ find_conflict
  
  out:
        spin_unlock(&ctx->flc_lock);
 +      percpu_up_read_preempt_enable(&file_rwsem);
        if (new_fl)
                locks_free_lock(new_fl);
        locks_dispose_list(&dispose);
@@@ -1008,7 -996,6 +1013,7 @@@ static int posix_lock_inode(struct inod
                new_fl2 = locks_alloc_lock();
        }
  
 +      percpu_down_read_preempt_disable(&file_rwsem);
        spin_lock(&ctx->flc_lock);
        /*
         * New lock request. Walk all POSIX locks and look for conflicts. If
        }
   out:
        spin_unlock(&ctx->flc_lock);
 +      percpu_up_read_preempt_enable(&file_rwsem);
        /*
         * Free any unused locks.
         */
  int posix_lock_file(struct file *filp, struct file_lock *fl,
                        struct file_lock *conflock)
  {
-       return posix_lock_inode(file_inode(filp), fl, conflock);
+       return posix_lock_inode(locks_inode(filp), fl, conflock);
  }
  EXPORT_SYMBOL(posix_lock_file);
  
@@@ -1251,7 -1237,7 +1256,7 @@@ static int posix_lock_inode_wait(struc
  int locks_mandatory_locked(struct file *file)
  {
        int ret;
-       struct inode *inode = file_inode(file);
+       struct inode *inode = locks_inode(file);
        struct file_lock_context *ctx;
        struct file_lock *fl;
  
@@@ -1455,7 -1441,6 +1460,7 @@@ int __break_lease(struct inode *inode, 
                return error;
        }
  
 +      percpu_down_read_preempt_disable(&file_rwsem);
        spin_lock(&ctx->flc_lock);
  
        time_out_leases(inode, &dispose);
@@@ -1507,13 -1492,9 +1512,13 @@@ restart
        locks_insert_block(fl, new_fl);
        trace_break_lease_block(inode, new_fl);
        spin_unlock(&ctx->flc_lock);
 +      percpu_up_read_preempt_enable(&file_rwsem);
 +
        locks_dispose_list(&dispose);
        error = wait_event_interruptible_timeout(new_fl->fl_wait,
                                                !new_fl->fl_next, break_time);
 +
 +      percpu_down_read_preempt_disable(&file_rwsem);
        spin_lock(&ctx->flc_lock);
        trace_break_lease_unblock(inode, new_fl);
        locks_delete_block(new_fl);
        }
  out:
        spin_unlock(&ctx->flc_lock);
 +      percpu_up_read_preempt_enable(&file_rwsem);
        locks_dispose_list(&dispose);
        locks_free_lock(new_fl);
        return error;
@@@ -1597,7 -1577,7 +1602,7 @@@ EXPORT_SYMBOL(lease_get_mtime)
  int fcntl_getlease(struct file *filp)
  {
        struct file_lock *fl;
-       struct inode *inode = file_inode(filp);
+       struct inode *inode = locks_inode(filp);
        struct file_lock_context *ctx;
        int type = F_UNLCK;
        LIST_HEAD(dispose);
        ctx = smp_load_acquire(&inode->i_flctx);
        if (ctx && !list_empty_careful(&ctx->flc_lease)) {
                spin_lock(&ctx->flc_lock);
-               time_out_leases(file_inode(filp), &dispose);
+               time_out_leases(inode, &dispose);
                list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
                        if (fl->fl_file != filp)
                                continue;
@@@ -1638,7 -1618,8 +1643,8 @@@ check_conflicting_open(const struct den
        if (flags & FL_LAYOUT)
                return 0;
  
-       if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
+       if ((arg == F_RDLCK) &&
+           (atomic_read(&d_real_inode(dentry)->i_writecount) > 0))
                return -EAGAIN;
  
        if ((arg == F_WRLCK) && ((d_count(dentry) > 1) ||
@@@ -1653,7 -1634,7 +1659,7 @@@ generic_add_lease(struct file *filp, lo
  {
        struct file_lock *fl, *my_fl = NULL, *lease;
        struct dentry *dentry = filp->f_path.dentry;
-       struct inode *inode = file_inode(filp);
+       struct inode *inode = dentry->d_inode;
        struct file_lock_context *ctx;
        bool is_deleg = (*flp)->fl_flags & FL_DELEG;
        int error;
                return -EINVAL;
        }
  
 +      percpu_down_read_preempt_disable(&file_rwsem);
        spin_lock(&ctx->flc_lock);
        time_out_leases(inode, &dispose);
        error = check_conflicting_open(dentry, arg, lease->fl_flags);
@@@ -1756,7 -1736,6 +1762,7 @@@ out_setup
                lease->fl_lmops->lm_setup(lease, priv);
  out:
        spin_unlock(&ctx->flc_lock);
 +      percpu_up_read_preempt_enable(&file_rwsem);
        locks_dispose_list(&dispose);
        if (is_deleg)
                inode_unlock(inode);
@@@ -1769,7 -1748,7 +1775,7 @@@ static int generic_delete_lease(struct 
  {
        int error = -EAGAIN;
        struct file_lock *fl, *victim = NULL;
-       struct inode *inode = file_inode(filp);
+       struct inode *inode = locks_inode(filp);
        struct file_lock_context *ctx;
        LIST_HEAD(dispose);
  
                return error;
        }
  
 +      percpu_down_read_preempt_disable(&file_rwsem);
        spin_lock(&ctx->flc_lock);
        list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
                if (fl->fl_file == filp &&
        if (victim)
                error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
        spin_unlock(&ctx->flc_lock);
 +      percpu_up_read_preempt_enable(&file_rwsem);
        locks_dispose_list(&dispose);
        return error;
  }
  int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
                        void **priv)
  {
-       struct inode *inode = file_inode(filp);
+       struct inode *inode = locks_inode(filp);
        int error;
  
        if ((!uid_eq(current_fsuid(), inode->i_uid)) && !capable(CAP_LEASE))
@@@ -1859,7 -1836,7 +1865,7 @@@ EXPORT_SYMBOL(generic_setlease)
  int
  vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
  {
-       if (filp->f_op->setlease)
+       if (filp->f_op->setlease && is_remote_lock(filp))
                return filp->f_op->setlease(filp, arg, lease, priv);
        else
                return generic_setlease(filp, arg, lease, priv);
@@@ -2008,7 -1985,7 +2014,7 @@@ SYSCALL_DEFINE2(flock, unsigned int, fd
        if (error)
                goto out_free;
  
-       if (f.file->f_op->flock)
+       if (f.file->f_op->flock && is_remote_lock(f.file))
                error = f.file->f_op->flock(f.file,
                                          (can_sleep) ? F_SETLKW : F_SETLK,
                                          lock);
   */
  int vfs_test_lock(struct file *filp, struct file_lock *fl)
  {
-       if (filp->f_op->lock)
+       if (filp->f_op->lock && is_remote_lock(filp))
                return filp->f_op->lock(filp, F_GETLK, fl);
        posix_test_lock(filp, fl);
        return 0;
@@@ -2158,7 -2135,7 +2164,7 @@@ out
   */
  int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
  {
-       if (filp->f_op->lock)
+       if (filp->f_op->lock && is_remote_lock(filp))
                return filp->f_op->lock(filp, cmd, fl);
        else
                return posix_lock_file(filp, fl, conf);
@@@ -2220,7 -2197,7 +2226,7 @@@ int fcntl_setlk(unsigned int fd, struc
        if (file_lock == NULL)
                return -ENOLCK;
  
-       inode = file_inode(filp);
+       inode = locks_inode(filp);
  
        /*
         * This might block, so we do it before checking the inode.
@@@ -2372,7 -2349,7 +2378,7 @@@ int fcntl_setlk64(unsigned int fd, stru
        if (copy_from_user(&flock, l, sizeof(flock)))
                goto out;
  
-       inode = file_inode(filp);
+       inode = locks_inode(filp);
  
        /* Don't allow mandatory locks on files that may be memory mapped
         * and shared.
@@@ -2455,6 -2432,7 +2461,7 @@@ out
  void locks_remove_posix(struct file *filp, fl_owner_t owner)
  {
        int error;
+       struct inode *inode = locks_inode(filp);
        struct file_lock lock;
        struct file_lock_context *ctx;
  
         * posix_lock_file().  Another process could be setting a lock on this
         * file at the same time, but we wouldn't remove that lock anyway.
         */
-       ctx =  smp_load_acquire(&file_inode(filp)->i_flctx);
+       ctx =  smp_load_acquire(&inode->i_flctx);
        if (!ctx || list_empty(&ctx->flc_posix))
                return;
  
  
        if (lock.fl_ops && lock.fl_ops->fl_release_private)
                lock.fl_ops->fl_release_private(&lock);
-       trace_locks_remove_posix(file_inode(filp), &lock, error);
+       trace_locks_remove_posix(inode, &lock, error);
  }
  
  EXPORT_SYMBOL(locks_remove_posix);
@@@ -2498,12 -2476,12 +2505,12 @@@ locks_remove_flock(struct file *filp, s
                .fl_type = F_UNLCK,
                .fl_end = OFFSET_MAX,
        };
-       struct inode *inode = file_inode(filp);
+       struct inode *inode = locks_inode(filp);
  
        if (list_empty(&flctx->flc_flock))
                return;
  
-       if (filp->f_op->flock)
+       if (filp->f_op->flock && is_remote_lock(filp))
                filp->f_op->flock(filp, F_SETLKW, &fl);
        else
                flock_lock_inode(inode, &fl);
@@@ -2537,7 -2515,7 +2544,7 @@@ void locks_remove_file(struct file *fil
  {
        struct file_lock_context *ctx;
  
-       ctx = smp_load_acquire(&file_inode(filp)->i_flctx);
+       ctx = smp_load_acquire(&locks_inode(filp)->i_flctx);
        if (!ctx)
                return;
  
@@@ -2581,7 -2559,7 +2588,7 @@@ EXPORT_SYMBOL(posix_unblock_lock)
   */
  int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
  {
-       if (filp->f_op->lock)
+       if (filp->f_op->lock && is_remote_lock(filp))
                return filp->f_op->lock(filp, F_CANCELLK, fl);
        return 0;
  }
@@@ -2603,24 -2581,13 +2610,24 @@@ static void lock_get_status(struct seq_
        struct inode *inode = NULL;
        unsigned int fl_pid;
  
 -      if (fl->fl_nspid)
 -              fl_pid = pid_vnr(fl->fl_nspid);
 -      else
 +      if (fl->fl_nspid) {
 +              struct pid_namespace *proc_pidns = file_inode(f->file)->i_sb->s_fs_info;
 +
 +              /* Don't let fl_pid change based on who is reading the file */
 +              fl_pid = pid_nr_ns(fl->fl_nspid, proc_pidns);
 +
 +              /*
 +               * If there isn't a fl_pid don't display who is waiting on
 +               * the lock if we are called from locks_show, or if we are
 +               * called from __show_fd_info - skip lock entirely
 +               */
 +              if (fl_pid == 0)
 +                      return;
 +      } else
                fl_pid = fl->fl_pid;
  
        if (fl->fl_file != NULL)
-               inode = file_inode(fl->fl_file);
+               inode = locks_inode(fl->fl_file);
  
        seq_printf(f, "%lld:%s ", id, pfx);
        if (IS_POSIX(fl)) {
@@@ -2688,13 -2655,9 +2695,13 @@@ static int locks_show(struct seq_file *
  {
        struct locks_iterator *iter = f->private;
        struct file_lock *fl, *bfl;
 +      struct pid_namespace *proc_pidns = file_inode(f->file)->i_sb->s_fs_info;
  
        fl = hlist_entry(v, struct file_lock, fl_link);
  
 +      if (fl->fl_nspid && !pid_nr_ns(fl->fl_nspid, proc_pidns))
 +              return 0;
 +
        lock_get_status(f, fl, iter->li_pos, "");
  
        list_for_each_entry(bfl, &fl->fl_block, fl_block)
@@@ -2726,7 -2689,7 +2733,7 @@@ static void __show_fd_locks(struct seq_
  void show_fd_locks(struct seq_file *f,
                  struct file *filp, struct files_struct *files)
  {
-       struct inode *inode = file_inode(filp);
+       struct inode *inode = locks_inode(filp);
        struct file_lock_context *ctx;
        int id = 0;
  
@@@ -2747,9 -2710,9 +2754,9 @@@ static void *locks_start(struct seq_fil
        struct locks_iterator *iter = f->private;
  
        iter->li_pos = *pos + 1;
 -      lg_global_lock(&file_lock_lglock);
 +      percpu_down_write(&file_rwsem);
        spin_lock(&blocked_lock_lock);
 -      return seq_hlist_start_percpu(&file_lock_list, &iter->li_cpu, *pos);
 +      return seq_hlist_start_percpu(&file_lock_list.hlist, &iter->li_cpu, *pos);
  }
  
  static void *locks_next(struct seq_file *f, void *v, loff_t *pos)
        struct locks_iterator *iter = f->private;
  
        ++iter->li_pos;
 -      return seq_hlist_next_percpu(v, &file_lock_list, &iter->li_cpu, pos);
 +      return seq_hlist_next_percpu(v, &file_lock_list.hlist, &iter->li_cpu, pos);
  }
  
  static void locks_stop(struct seq_file *f, void *v)
        __releases(&blocked_lock_lock)
  {
        spin_unlock(&blocked_lock_lock);
 -      lg_global_unlock(&file_lock_lglock);
 +      percpu_up_write(&file_rwsem);
  }
  
  static const struct seq_operations locks_seq_operations = {
@@@ -2805,13 -2768,10 +2812,13 @@@ static int __init filelock_init(void
        filelock_cache = kmem_cache_create("file_lock_cache",
                        sizeof(struct file_lock), 0, SLAB_PANIC, NULL);
  
 -      lg_lock_init(&file_lock_lglock, "file_lock_lglock");
  
 -      for_each_possible_cpu(i)
 -              INIT_HLIST_HEAD(per_cpu_ptr(&file_lock_list, i));
 +      for_each_possible_cpu(i) {
 +              struct file_lock_list_struct *fll = per_cpu_ptr(&file_lock_list, i);
 +
 +              spin_lock_init(&fll->lock);
 +              INIT_HLIST_HEAD(&fll->hlist);
 +      }
  
        return 0;
  }
diff --combined fs/namespace.c
@@@ -27,9 -27,6 +27,9 @@@
  #include "pnode.h"
  #include "internal.h"
  
 +/* Maximum number of mounts in a mount namespace */
 +unsigned int sysctl_mount_max __read_mostly = 100000;
 +
  static unsigned int m_hash_mask __read_mostly;
  static unsigned int m_hash_shift __read_mostly;
  static unsigned int mp_hash_mask __read_mostly;
@@@ -902,9 -899,6 +902,9 @@@ static void commit_tree(struct mount *m
  
        list_splice(&head, n->list.prev);
  
 +      n->mounts += n->pending_mounts;
 +      n->pending_mounts = 0;
 +
        attach_shadowed(mnt, parent, shadows);
        touch_mnt_namespace(n);
  }
@@@ -1425,16 -1419,11 +1425,16 @@@ static void umount_tree(struct mount *m
                propagate_umount(&tmp_list);
  
        while (!list_empty(&tmp_list)) {
 +              struct mnt_namespace *ns;
                bool disconnect;
                p = list_first_entry(&tmp_list, struct mount, mnt_list);
                list_del_init(&p->mnt_expire);
                list_del_init(&p->mnt_list);
 -              __touch_mnt_namespace(p->mnt_ns);
 +              ns = p->mnt_ns;
 +              if (ns) {
 +                      ns->mounts--;
 +                      __touch_mnt_namespace(ns);
 +              }
                p->mnt_ns = NULL;
                if (how & UMOUNT_SYNC)
                        p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
@@@ -1851,28 -1840,6 +1851,28 @@@ static int invent_group_ids(struct moun
        return 0;
  }
  
 +int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
 +{
 +      unsigned int max = READ_ONCE(sysctl_mount_max);
 +      unsigned int mounts = 0, old, pending, sum;
 +      struct mount *p;
 +
 +      for (p = mnt; p; p = next_mnt(p, mnt))
 +              mounts++;
 +
 +      old = ns->mounts;
 +      pending = ns->pending_mounts;
 +      sum = old + pending;
 +      if ((old > sum) ||
 +          (pending > sum) ||
 +          (max < sum) ||
 +          (mounts > (max - sum)))
 +              return -ENOSPC;
 +
 +      ns->pending_mounts = pending + mounts;
 +      return 0;
 +}
 +
  /*
   *  @source_mnt : mount tree to be attached
   *  @nd         : place the mount tree @source_mnt is attached
@@@ -1942,18 -1909,10 +1942,18 @@@ static int attach_recursive_mnt(struct 
                        struct path *parent_path)
  {
        HLIST_HEAD(tree_list);
 +      struct mnt_namespace *ns = dest_mnt->mnt_ns;
        struct mount *child, *p;
        struct hlist_node *n;
        int err;
  
 +      /* Is there space to add these mounts to the mount namespace? */
 +      if (!parent_path) {
 +              err = count_mounts(ns, source_mnt);
 +              if (err)
 +                      goto out;
 +      }
 +
        if (IS_MNT_SHARED(dest_mnt)) {
                err = invent_group_ids(source_mnt, true);
                if (err)
   out_cleanup_ids:
        while (!hlist_empty(&tree_list)) {
                child = hlist_entry(tree_list.first, struct mount, mnt_hash);
 +              child->mnt_parent->mnt_ns->pending_mounts = 0;
                umount_tree(child, UMOUNT_SYNC);
        }
        unlock_mount_hash();
        cleanup_group_ids(source_mnt, NULL);
   out:
 +      ns->pending_mounts = 0;
        return err;
  }
  
@@@ -2743,7 -2700,7 +2743,7 @@@ long do_mount(const char *dev_name, con
  
        flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
                   MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
-                  MS_STRICTATIME);
+                  MS_STRICTATIME | MS_NOREMOTELOCK);
  
        if (flags & MS_REMOUNT)
                retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
@@@ -2762,20 -2719,9 +2762,20 @@@ dput_out
        return retval;
  }
  
 +static struct ucounts *inc_mnt_namespaces(struct user_namespace *ns)
 +{
 +      return inc_ucount(ns, current_euid(), UCOUNT_MNT_NAMESPACES);
 +}
 +
 +static void dec_mnt_namespaces(struct ucounts *ucounts)
 +{
 +      dec_ucount(ucounts, UCOUNT_MNT_NAMESPACES);
 +}
 +
  static void free_mnt_ns(struct mnt_namespace *ns)
  {
        ns_free_inum(&ns->ns);
 +      dec_mnt_namespaces(ns->ucounts);
        put_user_ns(ns->user_ns);
        kfree(ns);
  }
@@@ -2792,22 -2738,14 +2792,22 @@@ static atomic64_t mnt_ns_seq = ATOMIC64
  static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
  {
        struct mnt_namespace *new_ns;
 +      struct ucounts *ucounts;
        int ret;
  
 +      ucounts = inc_mnt_namespaces(user_ns);
 +      if (!ucounts)
 +              return ERR_PTR(-ENOSPC);
 +
        new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
 -      if (!new_ns)
 +      if (!new_ns) {
 +              dec_mnt_namespaces(ucounts);
                return ERR_PTR(-ENOMEM);
 +      }
        ret = ns_alloc_inum(&new_ns->ns);
        if (ret) {
                kfree(new_ns);
 +              dec_mnt_namespaces(ucounts);
                return ERR_PTR(ret);
        }
        new_ns->ns.ops = &mntns_operations;
        init_waitqueue_head(&new_ns->poll);
        new_ns->event = 0;
        new_ns->user_ns = get_user_ns(user_ns);
 +      new_ns->ucounts = ucounts;
 +      new_ns->mounts = 0;
 +      new_ns->pending_mounts = 0;
        return new_ns;
  }
  
@@@ -2870,7 -2805,6 +2870,7 @@@ struct mnt_namespace *copy_mnt_ns(unsig
        q = new;
        while (p) {
                q->mnt_ns = new_ns;
 +              new_ns->mounts++;
                if (new_fs) {
                        if (&p->mnt == new_fs->root.mnt) {
                                new_fs->root.mnt = mntget(&q->mnt);
@@@ -2909,7 -2843,6 +2909,7 @@@ static struct mnt_namespace *create_mnt
                struct mount *mnt = real_mount(m);
                mnt->mnt_ns = new_ns;
                new_ns->root = mnt;
 +              new_ns->mounts++;
                list_add(&mnt->mnt_list, &new_ns->list);
        } else {
                mntput(m);
@@@ -3415,16 -3348,10 +3415,16 @@@ static int mntns_install(struct nsprox
        return 0;
  }
  
 +static struct user_namespace *mntns_owner(struct ns_common *ns)
 +{
 +      return to_mnt_ns(ns)->user_ns;
 +}
 +
  const struct proc_ns_operations mntns_operations = {
        .name           = "mnt",
        .type           = CLONE_NEWNS,
        .get            = mntns_get,
        .put            = mntns_put,
        .install        = mntns_install,
 +      .owner          = mntns_owner,
  };
diff --combined fs/ocfs2/dlmfs/dlmfs.c
@@@ -211,7 -211,7 +211,7 @@@ static int dlmfs_file_setattr(struct de
        struct inode *inode = d_inode(dentry);
  
        attr->ia_valid &= ~ATTR_SIZE;
-       error = inode_change_ok(inode, attr);
+       error = setattr_prepare(dentry, attr);
        if (error)
                return error;
  
@@@ -646,7 -646,7 +646,7 @@@ static int __init init_dlmfs_fs(void
        }
        cleanup_inode = 1;
  
 -      user_dlm_worker = create_singlethread_workqueue("user_dlm");
 +      user_dlm_worker = alloc_workqueue("user_dlm", WQ_MEM_RECLAIM, 0);
        if (!user_dlm_worker) {
                status = -ENOMEM;
                goto bail;
diff --combined fs/ocfs2/file.c
@@@ -1155,7 -1155,7 +1155,7 @@@ int ocfs2_setattr(struct dentry *dentry
        if (!(attr->ia_valid & OCFS2_VALID_ATTRS))
                return 0;
  
-       status = inode_change_ok(inode, attr);
+       status = setattr_prepare(dentry, attr);
        if (status)
                return status;
  
@@@ -2321,6 -2321,36 +2321,6 @@@ out_mutex
        return ret;
  }
  
 -static ssize_t ocfs2_file_splice_read(struct file *in,
 -                                    loff_t *ppos,
 -                                    struct pipe_inode_info *pipe,
 -                                    size_t len,
 -                                    unsigned int flags)
 -{
 -      int ret = 0, lock_level = 0;
 -      struct inode *inode = file_inode(in);
 -
 -      trace_ocfs2_file_splice_read(inode, in, in->f_path.dentry,
 -                      (unsigned long long)OCFS2_I(inode)->ip_blkno,
 -                      in->f_path.dentry->d_name.len,
 -                      in->f_path.dentry->d_name.name, len);
 -
 -      /*
 -       * See the comment in ocfs2_file_read_iter()
 -       */
 -      ret = ocfs2_inode_lock_atime(inode, in->f_path.mnt, &lock_level);
 -      if (ret < 0) {
 -              mlog_errno(ret);
 -              goto bail;
 -      }
 -      ocfs2_inode_unlock(inode, lock_level);
 -
 -      ret = generic_file_splice_read(in, ppos, pipe, len, flags);
 -
 -bail:
 -      return ret;
 -}
 -
  static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
                                   struct iov_iter *to)
  {
@@@ -2479,7 -2509,7 +2479,7 @@@ const struct file_operations ocfs2_fop
  #endif
        .lock           = ocfs2_lock,
        .flock          = ocfs2_flock,
 -      .splice_read    = ocfs2_file_splice_read,
 +      .splice_read    = generic_file_splice_read,
        .splice_write   = iter_file_splice_write,
        .fallocate      = ocfs2_fallocate,
  };
@@@ -2524,7 -2554,7 +2524,7 @@@ const struct file_operations ocfs2_fops
        .compat_ioctl   = ocfs2_compat_ioctl,
  #endif
        .flock          = ocfs2_flock,
 -      .splice_read    = ocfs2_file_splice_read,
 +      .splice_read    = generic_file_splice_read,
        .splice_write   = iter_file_splice_write,
        .fallocate      = ocfs2_fallocate,
  };
diff --combined fs/orangefs/file.c
  #include <linux/fs.h>
  #include <linux/pagemap.h>
  
 +static int flush_racache(struct inode *inode)
 +{
 +      struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
 +      struct orangefs_kernel_op_s *new_op;
 +      int ret;
 +
 +      gossip_debug(GOSSIP_UTILS_DEBUG,
 +          "%s: %pU: Handle is %pU | fs_id %d\n", __func__,
 +          get_khandle_from_ino(inode), &orangefs_inode->refn.khandle,
 +          orangefs_inode->refn.fs_id);
 +
 +      new_op = op_alloc(ORANGEFS_VFS_OP_RA_FLUSH);
 +      if (!new_op)
 +              return -ENOMEM;
 +      new_op->upcall.req.ra_cache_flush.refn = orangefs_inode->refn;
 +
 +      ret = service_operation(new_op, "orangefs_flush_racache",
 +          get_interruptible_flag(inode));
 +
 +      gossip_debug(GOSSIP_UTILS_DEBUG, "%s: got return value of %d\n",
 +          __func__, ret);
 +
 +      op_release(new_op);
 +      return ret;
 +}
 +
  /*
   * Copy to client-core's address space from the buffers specified
   * by the iovec upto total_size bytes.
@@@ -412,7 -386,7 +412,7 @@@ ssize_t orangefs_inode_read(struct inod
        size_t bufmap_size;
        ssize_t ret = -EINVAL;
  
 -      g_orangefs_stats.reads++;
 +      orangefs_stats.reads++;
  
        bufmap_size = orangefs_bufmap_size_query();
        if (count > bufmap_size) {
@@@ -453,7 -427,7 +453,7 @@@ static ssize_t orangefs_file_read_iter(
  
        gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_file_read_iter\n");
  
 -      g_orangefs_stats.reads++;
 +      orangefs_stats.reads++;
  
        rc = do_readv_writev(ORANGEFS_IO_READ, file, &pos, iter);
        iocb->ki_pos = pos;
@@@ -514,7 -488,7 +514,7 @@@ static ssize_t orangefs_file_write_iter
        }
  
        iocb->ki_pos = pos;
 -      g_orangefs_stats.writes++;
 +      orangefs_stats.writes++;
  
  out:
  
@@@ -611,30 -585,21 +611,30 @@@ static int orangefs_file_mmap(struct fi
  static int orangefs_file_release(struct inode *inode, struct file *file)
  {
        gossip_debug(GOSSIP_FILE_DEBUG,
-                    "orangefs_file_release: called on %s\n",
-                    file->f_path.dentry->d_name.name);
+                    "orangefs_file_release: called on %pD\n",
+                    file);
  
        orangefs_flush_inode(inode);
  
        /*
 -       * remove all associated inode pages from the page cache and mmap
 +       * remove all associated inode pages from the page cache and
         * readahead cache (if any); this forces an expensive refresh of
         * data for the next caller of mmap (or 'get_block' accesses)
         */
        if (file->f_path.dentry->d_inode &&
            file->f_path.dentry->d_inode->i_mapping &&
 -          mapping_nrpages(&file->f_path.dentry->d_inode->i_data))
 +          mapping_nrpages(&file->f_path.dentry->d_inode->i_data)) {
 +              if (orangefs_features & ORANGEFS_FEATURE_READAHEAD) {
 +                      gossip_debug(GOSSIP_INODE_DEBUG,
 +                          "calling flush_racache on %pU\n",
 +                          get_khandle_from_ino(inode));
 +                      flush_racache(inode);
 +                      gossip_debug(GOSSIP_INODE_DEBUG,
 +                          "flush_racache finished\n");
 +              }
                truncate_inode_pages(file->f_path.dentry->d_inode->i_mapping,
                                     0);
 +      }
        return 0;
  }
  
diff --combined fs/orangefs/namei.c
@@@ -24,9 -24,9 +24,9 @@@ static int orangefs_create(struct inod
        struct inode *inode;
        int ret;
  
-       gossip_debug(GOSSIP_NAME_DEBUG, "%s: %s\n",
+       gossip_debug(GOSSIP_NAME_DEBUG, "%s: %pd\n",
                     __func__,
-                    dentry->d_name.name);
+                    dentry);
  
        new_op = op_alloc(ORANGEFS_VFS_OP_CREATE);
        if (!new_op)
@@@ -43,9 -43,9 +43,9 @@@
        ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
  
        gossip_debug(GOSSIP_NAME_DEBUG,
-                    "%s: %s: handle:%pU: fsid:%d: new_op:%p: ret:%d:\n",
+                    "%s: %pd: handle:%pU: fsid:%d: new_op:%p: ret:%d:\n",
                     __func__,
-                    dentry->d_name.name,
+                    dentry,
                     &new_op->downcall.resp.create.refn.khandle,
                     new_op->downcall.resp.create.refn.fs_id,
                     new_op,
        inode = orangefs_new_inode(dir->i_sb, dir, S_IFREG | mode, 0,
                                &new_op->downcall.resp.create.refn);
        if (IS_ERR(inode)) {
-               gossip_err("%s: Failed to allocate inode for file :%s:\n",
+               gossip_err("%s: Failed to allocate inode for file :%pd:\n",
                           __func__,
-                          dentry->d_name.name);
+                          dentry);
                ret = PTR_ERR(inode);
                goto out;
        }
  
        gossip_debug(GOSSIP_NAME_DEBUG,
-                    "%s: Assigned inode :%pU: for file :%s:\n",
+                    "%s: Assigned inode :%pU: for file :%pd:\n",
                     __func__,
                     get_khandle_from_ino(inode),
-                    dentry->d_name.name);
+                    dentry);
  
        d_instantiate(dentry, inode);
        unlock_new_inode(inode);
 -      dentry->d_time = jiffies + dcache_timeout_msecs*HZ/1000;
 +      dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
        ORANGEFS_I(inode)->getattr_time = jiffies - 1;
  
        gossip_debug(GOSSIP_NAME_DEBUG,
-                    "%s: dentry instantiated for %s\n",
+                    "%s: dentry instantiated for %pd\n",
                     __func__,
-                    dentry->d_name.name);
+                    dentry);
  
        SetMtimeFlag(parent);
        dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
@@@ -87,9 -87,9 +87,9 @@@
  out:
        op_release(new_op);
        gossip_debug(GOSSIP_NAME_DEBUG,
-                    "%s: %s: returning %d\n",
+                    "%s: %pd: returning %d\n",
                     __func__,
-                    dentry->d_name.name,
+                    dentry,
                     ret);
        return ret;
  }
@@@ -115,8 -115,8 +115,8 @@@ static struct dentry *orangefs_lookup(s
         * -EEXIST on O_EXCL opens, which is broken if we skip this lookup
         * in the create path)
         */
-       gossip_debug(GOSSIP_NAME_DEBUG, "%s called on %s\n",
-                    __func__, dentry->d_name.name);
+       gossip_debug(GOSSIP_NAME_DEBUG, "%s called on %pd\n",
+                    __func__, dentry);
  
        if (dentry->d_name.len > (ORANGEFS_NAME_MAX - 1))
                return ERR_PTR(-ENAMETOOLONG);
  
                        gossip_debug(GOSSIP_NAME_DEBUG,
                                     "orangefs_lookup: Adding *negative* dentry "
-                                    "%p for %s\n",
+                                    "%p for %pd\n",
                                     dentry,
-                                    dentry->d_name.name);
+                                    dentry);
  
                        d_add(dentry, NULL);
                        res = NULL;
                goto out;
        }
  
 -      dentry->d_time = jiffies + dcache_timeout_msecs*HZ/1000;
 +      dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
  
        inode = orangefs_iget(dir->i_sb, &new_op->downcall.resp.lookup.refn);
        if (IS_ERR(inode)) {
@@@ -224,10 -224,10 +224,10 @@@ static int orangefs_unlink(struct inod
        int ret;
  
        gossip_debug(GOSSIP_NAME_DEBUG,
-                    "%s: called on %s\n"
+                    "%s: called on %pd\n"
                     "  (inode %pU): Parent is %pU | fs_id %d\n",
                     __func__,
-                    dentry->d_name.name,
+                    dentry,
                     get_khandle_from_ino(inode),
                     &parent->refn.khandle,
                     parent->refn.fs_id);
@@@ -322,13 -322,13 +322,13 @@@ static int orangefs_symlink(struct inod
  
        d_instantiate(dentry, inode);
        unlock_new_inode(inode);
 -      dentry->d_time = jiffies + dcache_timeout_msecs*HZ/1000;
 +      dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
        ORANGEFS_I(inode)->getattr_time = jiffies - 1;
  
        gossip_debug(GOSSIP_NAME_DEBUG,
-                    "Inode (Symlink) %pU -> %s\n",
+                    "Inode (Symlink) %pU -> %pd\n",
                     get_khandle_from_ino(inode),
-                    dentry->d_name.name);
+                    dentry);
  
        SetMtimeFlag(parent);
        dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
@@@ -386,13 -386,13 +386,13 @@@ static int orangefs_mkdir(struct inode 
  
        d_instantiate(dentry, inode);
        unlock_new_inode(inode);
 -      dentry->d_time = jiffies + dcache_timeout_msecs*HZ/1000;
 +      dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
        ORANGEFS_I(inode)->getattr_time = jiffies - 1;
  
        gossip_debug(GOSSIP_NAME_DEBUG,
-                    "Inode (Directory) %pU -> %s\n",
+                    "Inode (Directory) %pU -> %pd\n",
                     get_khandle_from_ino(inode),
-                    dentry->d_name.name);
+                    dentry);
  
        /*
         * NOTE: we have no good way to keep nlink consistent for directories
  #include "protocol.h"
  #include "orangefs-kernel.h"
  
 -static int orangefs_debug_disabled = 1;
 -
 -static int orangefs_debug_help_open(struct inode *, struct file *);
 +#define DEBUG_HELP_STRING_SIZE 4096
 +#define HELP_STRING_UNINITIALIZED \
 +      "Client Debug Keywords are unknown until the first time\n" \
 +      "the client is started after boot.\n"
 +#define ORANGEFS_KMOD_DEBUG_HELP_FILE "debug-help"
 +#define ORANGEFS_KMOD_DEBUG_FILE "kernel-debug"
 +#define ORANGEFS_CLIENT_DEBUG_FILE "client-debug"
 +#define ORANGEFS_VERBOSE "verbose"
 +#define ORANGEFS_ALL "all"
  
 -const struct file_operations debug_help_fops = {
 -      .open           = orangefs_debug_help_open,
 -      .read           = seq_read,
 -      .release        = seq_release,
 -      .llseek         = seq_lseek,
 +/*
 + * An array of client_debug_mask will be built to hold debug keyword/mask
 + * values fetched from userspace.
 + */
 +struct client_debug_mask {
 +      char *keyword;
 +      __u64 mask1;
 +      __u64 mask2;
  };
  
 +static int orangefs_kernel_debug_init(void);
 +
 +static int orangefs_debug_help_open(struct inode *, struct file *);
  static void *help_start(struct seq_file *, loff_t *);
  static void *help_next(struct seq_file *, void *, loff_t *);
  static void help_stop(struct seq_file *, void *);
  static int help_show(struct seq_file *, void *);
  
 -static const struct seq_operations help_debug_ops = {
 -      .start  = help_start,
 -      .next   = help_next,
 -      .stop   = help_stop,
 -      .show   = help_show,
 -};
 -
 -/*
 - * Used to protect data in ORANGEFS_KMOD_DEBUG_FILE and
 - * ORANGEFS_KMOD_DEBUG_FILE.
 - */
 -static DEFINE_MUTEX(orangefs_debug_lock);
 -
 -int orangefs_debug_open(struct inode *, struct file *);
 +static int orangefs_debug_open(struct inode *, struct file *);
  
  static ssize_t orangefs_debug_read(struct file *,
                                 char __user *,
@@@ -83,43 -84,6 +83,43 @@@ static ssize_t orangefs_debug_write(str
                                  size_t,
                                  loff_t *);
  
 +static int orangefs_prepare_cdm_array(char *);
 +static void debug_mask_to_string(void *, int);
 +static void do_k_string(void *, int);
 +static void do_c_string(void *, int);
 +static int keyword_is_amalgam(char *);
 +static int check_amalgam_keyword(void *, int);
 +static void debug_string_to_mask(char *, void *, int);
 +static void do_c_mask(int, char *, struct client_debug_mask **);
 +static void do_k_mask(int, char *, __u64 **);
 +
 +static char kernel_debug_string[ORANGEFS_MAX_DEBUG_STRING_LEN] = "none";
 +static char *debug_help_string;
 +static char client_debug_string[ORANGEFS_MAX_DEBUG_STRING_LEN];
 +static char client_debug_array_string[ORANGEFS_MAX_DEBUG_STRING_LEN];
 +
 +static struct dentry *help_file_dentry;
 +static struct dentry *client_debug_dentry;
 +static struct dentry *debug_dir;
 +
 +static unsigned int kernel_mask_set_mod_init;
 +static int orangefs_debug_disabled = 1;
 +static int help_string_initialized;
 +
 +static const struct seq_operations help_debug_ops = {
 +      .start  = help_start,
 +      .next   = help_next,
 +      .stop   = help_stop,
 +      .show   = help_show,
 +};
 +
 +const struct file_operations debug_help_fops = {
 +      .open           = orangefs_debug_help_open,
 +      .read           = seq_read,
 +      .release        = seq_release,
 +      .llseek         = seq_lseek,
 +};
 +
  static const struct file_operations kernel_debug_fops = {
        .open           = orangefs_debug_open,
        .read           = orangefs_debug_read,
        .llseek         = generic_file_llseek,
  };
  
 +static int client_all_index;
 +static int client_verbose_index;
 +
 +static struct client_debug_mask *cdm_array;
 +static int cdm_element_count;
 +
 +static struct client_debug_mask client_debug_mask;
 +
 +/*
 + * Used to protect data in ORANGEFS_KMOD_DEBUG_FILE and
 + * ORANGEFS_KMOD_DEBUG_FILE.
 + */
 +static DEFINE_MUTEX(orangefs_debug_lock);
 +
  /*
   * initialize kmod debug operations, create orangefs debugfs dir and
   * ORANGEFS_KMOD_DEBUG_HELP_FILE.
   */
 -int orangefs_debugfs_init(void)
 +int orangefs_debugfs_init(int debug_mask)
  {
 -
        int rc = -ENOMEM;
  
 +      /* convert input debug mask to a 64-bit unsigned integer */
 +        orangefs_gossip_debug_mask = (unsigned long long)debug_mask;
 +
 +      /*
 +       * set the kernel's gossip debug string; invalid mask values will
 +       * be ignored.
 +       */
 +      debug_mask_to_string(&orangefs_gossip_debug_mask, 0);
 +
 +      /* remove any invalid values from the mask */
 +      debug_string_to_mask(kernel_debug_string, &orangefs_gossip_debug_mask,
 +          0);
 +
 +      /*
 +       * if the mask has a non-zero value, then indicate that the mask
 +       * was set when the kernel module was loaded.  The orangefs dev ioctl
 +       * command will look at this boolean to determine if the kernel's
 +       * debug mask should be overwritten when the client-core is started.
 +       */
 +      if (orangefs_gossip_debug_mask != 0)
 +              kernel_mask_set_mod_init = true;
 +
 +      pr_info("%s: called with debug mask: :%s: :%llx:\n",
 +              __func__,
 +              kernel_debug_string,
 +              (unsigned long long)orangefs_gossip_debug_mask);
 +
        debug_dir = debugfs_create_dir("orangefs", NULL);
        if (!debug_dir) {
                pr_info("%s: debugfs_create_dir failed.\n", __func__);
        }
  
        orangefs_debug_disabled = 0;
 +
 +      rc = orangefs_kernel_debug_init();
 +
 +out:
 +
 +      return rc;
 +}
 +
 +/*
 + * initialize the kernel-debug file.
 + */
 +static int orangefs_kernel_debug_init(void)
 +{
 +      int rc = -ENOMEM;
 +      struct dentry *ret;
 +      char *k_buffer = NULL;
 +
 +      gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: start\n", __func__);
 +
 +      k_buffer = kzalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL);
 +      if (!k_buffer)
 +              goto out;
 +
 +      if (strlen(kernel_debug_string) + 1 < ORANGEFS_MAX_DEBUG_STRING_LEN) {
 +              strcpy(k_buffer, kernel_debug_string);
 +              strcat(k_buffer, "\n");
 +      } else {
 +              strcpy(k_buffer, "none\n");
 +              pr_info("%s: overflow 1!\n", __func__);
 +      }
 +
 +      ret = debugfs_create_file(ORANGEFS_KMOD_DEBUG_FILE,
 +                                0444,
 +                                debug_dir,
 +                                k_buffer,
 +                                &kernel_debug_fops);
 +      if (!ret) {
 +              pr_info("%s: failed to create %s.\n",
 +                      __func__,
 +                      ORANGEFS_KMOD_DEBUG_FILE);
 +              goto out;
 +      }
 +
        rc = 0;
  
  out:
  
 +      gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc);
        return rc;
  }
  
 +
  void orangefs_debugfs_cleanup(void)
  {
        debugfs_remove_recursive(debug_dir);
@@@ -316,6 -195,49 +316,6 @@@ static int help_show(struct seq_file *m
        return 0;
  }
  
 -/*
 - * initialize the kernel-debug file.
 - */
 -int orangefs_kernel_debug_init(void)
 -{
 -      int rc = -ENOMEM;
 -      struct dentry *ret;
 -      char *k_buffer = NULL;
 -
 -      gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: start\n", __func__);
 -
 -      k_buffer = kzalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL);
 -      if (!k_buffer)
 -              goto out;
 -
 -      if (strlen(kernel_debug_string) + 1 < ORANGEFS_MAX_DEBUG_STRING_LEN) {
 -              strcpy(k_buffer, kernel_debug_string);
 -              strcat(k_buffer, "\n");
 -      } else {
 -              strcpy(k_buffer, "none\n");
 -              pr_info("%s: overflow 1!\n", __func__);
 -      }
 -
 -      ret = debugfs_create_file(ORANGEFS_KMOD_DEBUG_FILE,
 -                                0444,
 -                                debug_dir,
 -                                k_buffer,
 -                                &kernel_debug_fops);
 -      if (!ret) {
 -              pr_info("%s: failed to create %s.\n",
 -                      __func__,
 -                      ORANGEFS_KMOD_DEBUG_FILE);
 -              goto out;
 -      }
 -
 -      rc = 0;
 -
 -out:
 -
 -      gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc);
 -      return rc;
 -}
 -
  /*
   * initialize the client-debug file.
   */
@@@ -360,7 -282,7 +360,7 @@@ out
  }
  
  /* open ORANGEFS_KMOD_DEBUG_FILE or ORANGEFS_CLIENT_DEBUG_FILE.*/
 -int orangefs_debug_open(struct inode *inode, struct file *file)
 +static int orangefs_debug_open(struct inode *inode, struct file *file)
  {
        int rc = -ENODEV;
  
@@@ -428,8 -350,8 +428,8 @@@ static ssize_t orangefs_debug_write(str
        struct client_debug_mask c_mask = { NULL, 0, 0 };
  
        gossip_debug(GOSSIP_DEBUGFS_DEBUG,
-               "orangefs_debug_write: %s\n",
-               file->f_path.dentry->d_name.name);
+               "orangefs_debug_write: %pD\n",
+               file);
  
        /*
         * Thwart users who try to jamb a ridiculous number
         */
        if (!strcmp(file->f_path.dentry->d_name.name,
                    ORANGEFS_KMOD_DEBUG_FILE)) {
 -              debug_string_to_mask(buf, &gossip_debug_mask, 0);
 -              debug_mask_to_string(&gossip_debug_mask, 0);
 +              debug_string_to_mask(buf, &orangefs_gossip_debug_mask, 0);
 +              debug_mask_to_string(&orangefs_gossip_debug_mask, 0);
                debug_string = kernel_debug_string;
                gossip_debug(GOSSIP_DEBUGFS_DEBUG,
                             "New kernel debug string is %s\n",
@@@ -530,546 -452,3 +530,546 @@@ out
        kfree(buf);
        return rc;
  }
 +
 +/*
 + * After obtaining a string representation of the client's debug
 + * keywords and their associated masks, this function is called to build an
 + * array of these values.
 + */
 +static int orangefs_prepare_cdm_array(char *debug_array_string)
 +{
 +      int i;
 +      int rc = -EINVAL;
 +      char *cds_head = NULL;
 +      char *cds_delimiter = NULL;
 +      int keyword_len = 0;
 +
 +      gossip_debug(GOSSIP_UTILS_DEBUG, "%s: start\n", __func__);
 +
 +      /*
 +       * figure out how many elements the cdm_array needs.
 +       */
 +      for (i = 0; i < strlen(debug_array_string); i++)
 +              if (debug_array_string[i] == '\n')
 +                      cdm_element_count++;
 +
 +      if (!cdm_element_count) {
 +              pr_info("No elements in client debug array string!\n");
 +              goto out;
 +      }
 +
 +      cdm_array =
 +              kzalloc(cdm_element_count * sizeof(struct client_debug_mask),
 +                      GFP_KERNEL);
 +      if (!cdm_array) {
 +              pr_info("malloc failed for cdm_array!\n");
 +              rc = -ENOMEM;
 +              goto out;
 +      }
 +
 +      cds_head = debug_array_string;
 +
 +      for (i = 0; i < cdm_element_count; i++) {
 +              cds_delimiter = strchr(cds_head, '\n');
 +              *cds_delimiter = '\0';
 +
 +              keyword_len = strcspn(cds_head, " ");
 +
 +              cdm_array[i].keyword = kzalloc(keyword_len + 1, GFP_KERNEL);
 +              if (!cdm_array[i].keyword) {
 +                      rc = -ENOMEM;
 +                      goto out;
 +              }
 +
 +              sscanf(cds_head,
 +                     "%s %llx %llx",
 +                     cdm_array[i].keyword,
 +                     (unsigned long long *)&(cdm_array[i].mask1),
 +                     (unsigned long long *)&(cdm_array[i].mask2));
 +
 +              if (!strcmp(cdm_array[i].keyword, ORANGEFS_VERBOSE))
 +                      client_verbose_index = i;
 +
 +              if (!strcmp(cdm_array[i].keyword, ORANGEFS_ALL))
 +                      client_all_index = i;
 +
 +              cds_head = cds_delimiter + 1;
 +      }
 +
 +      rc = cdm_element_count;
 +
 +      gossip_debug(GOSSIP_UTILS_DEBUG, "%s: rc:%d:\n", __func__, rc);
 +
 +out:
 +
 +      return rc;
 +
 +}
 +
 +/*
 + * /sys/kernel/debug/orangefs/debug-help can be catted to
 + * see all the available kernel and client debug keywords.
 + *
 + * When the kernel boots, we have no idea what keywords the
 + * client supports, nor their associated masks.
 + *
 + * We pass through this function once at boot and stamp a
 + * boilerplate "we don't know" message for the client in the
 + * debug-help file. We pass through here again when the client
 + * starts and then we can fill out the debug-help file fully.
 + *
 + * The client might be restarted any number of times between
 + * reboots, we only build the debug-help file the first time.
 + */
 +int orangefs_prepare_debugfs_help_string(int at_boot)
 +{
 +      int rc = -EINVAL;
 +      int i;
 +      int byte_count = 0;
 +      char *client_title = "Client Debug Keywords:\n";
 +      char *kernel_title = "Kernel Debug Keywords:\n";
 +
 +      gossip_debug(GOSSIP_UTILS_DEBUG, "%s: start\n", __func__);
 +
 +      if (at_boot) {
 +              byte_count += strlen(HELP_STRING_UNINITIALIZED);
 +              client_title = HELP_STRING_UNINITIALIZED;
 +      } else {
 +              /*
 +               * fill the client keyword/mask array and remember
 +               * how many elements there were.
 +               */
 +              cdm_element_count =
 +                      orangefs_prepare_cdm_array(client_debug_array_string);
 +              if (cdm_element_count <= 0)
 +                      goto out;
 +
 +              /* Count the bytes destined for debug_help_string. */
 +              byte_count += strlen(client_title);
 +
 +              for (i = 0; i < cdm_element_count; i++) {
 +                      byte_count += strlen(cdm_array[i].keyword + 2);
 +                      if (byte_count >= DEBUG_HELP_STRING_SIZE) {
 +                              pr_info("%s: overflow 1!\n", __func__);
 +                              goto out;
 +                      }
 +              }
 +
 +              gossip_debug(GOSSIP_UTILS_DEBUG,
 +                           "%s: cdm_element_count:%d:\n",
 +                           __func__,
 +                           cdm_element_count);
 +      }
 +
 +      byte_count += strlen(kernel_title);
 +      for (i = 0; i < num_kmod_keyword_mask_map; i++) {
 +              byte_count +=
 +                      strlen(s_kmod_keyword_mask_map[i].keyword + 2);
 +              if (byte_count >= DEBUG_HELP_STRING_SIZE) {
 +                      pr_info("%s: overflow 2!\n", __func__);
 +                      goto out;
 +              }
 +      }
 +
 +      /* build debug_help_string. */
 +      debug_help_string = kzalloc(DEBUG_HELP_STRING_SIZE, GFP_KERNEL);
 +      if (!debug_help_string) {
 +              rc = -ENOMEM;
 +              goto out;
 +      }
 +
 +      strcat(debug_help_string, client_title);
 +
 +      if (!at_boot) {
 +              for (i = 0; i < cdm_element_count; i++) {
 +                      strcat(debug_help_string, "\t");
 +                      strcat(debug_help_string, cdm_array[i].keyword);
 +                      strcat(debug_help_string, "\n");
 +              }
 +      }
 +
 +      strcat(debug_help_string, "\n");
 +      strcat(debug_help_string, kernel_title);
 +
 +      for (i = 0; i < num_kmod_keyword_mask_map; i++) {
 +              strcat(debug_help_string, "\t");
 +              strcat(debug_help_string, s_kmod_keyword_mask_map[i].keyword);
 +              strcat(debug_help_string, "\n");
 +      }
 +
 +      rc = 0;
 +
 +out:
 +
 +      return rc;
 +
 +}
 +
 +/*
 + * kernel = type 0
 + * client = type 1
 + */
 +static void debug_mask_to_string(void *mask, int type)
 +{
 +      int i;
 +      int len = 0;
 +      char *debug_string;
 +      int element_count = 0;
 +
 +      gossip_debug(GOSSIP_UTILS_DEBUG, "%s: start\n", __func__);
 +
 +      if (type) {
 +              debug_string = client_debug_string;
 +              element_count = cdm_element_count;
 +      } else {
 +              debug_string = kernel_debug_string;
 +              element_count = num_kmod_keyword_mask_map;
 +      }
 +
 +      memset(debug_string, 0, ORANGEFS_MAX_DEBUG_STRING_LEN);
 +
 +      /*
 +       * Some keywords, like "all" or "verbose", are amalgams of
 +       * numerous other keywords. Make a special check for those
 +       * before grinding through the whole mask only to find out
 +       * later...
 +       */
 +      if (check_amalgam_keyword(mask, type))
 +              goto out;
 +
 +      /* Build the debug string. */
 +      for (i = 0; i < element_count; i++)
 +              if (type)
 +                      do_c_string(mask, i);
 +              else
 +                      do_k_string(mask, i);
 +
 +      len = strlen(debug_string);
 +
 +      if ((len) && (type))
 +              client_debug_string[len - 1] = '\0';
 +      else if (len)
 +              kernel_debug_string[len - 1] = '\0';
 +      else if (type)
 +              strcpy(client_debug_string, "none");
 +      else
 +              strcpy(kernel_debug_string, "none");
 +
 +out:
 +gossip_debug(GOSSIP_UTILS_DEBUG, "%s: string:%s:\n", __func__, debug_string);
 +
 +      return;
 +
 +}
 +
 +static void do_k_string(void *k_mask, int index)
 +{
 +      __u64 *mask = (__u64 *) k_mask;
 +
 +      if (keyword_is_amalgam((char *) s_kmod_keyword_mask_map[index].keyword))
 +              goto out;
 +
 +      if (*mask & s_kmod_keyword_mask_map[index].mask_val) {
 +              if ((strlen(kernel_debug_string) +
 +                   strlen(s_kmod_keyword_mask_map[index].keyword))
 +                      < ORANGEFS_MAX_DEBUG_STRING_LEN - 1) {
 +                              strcat(kernel_debug_string,
 +                                     s_kmod_keyword_mask_map[index].keyword);
 +                              strcat(kernel_debug_string, ",");
 +                      } else {
 +                              gossip_err("%s: overflow!\n", __func__);
 +                              strcpy(kernel_debug_string, ORANGEFS_ALL);
 +                              goto out;
 +                      }
 +      }
 +
 +out:
 +
 +      return;
 +}
 +
 +static void do_c_string(void *c_mask, int index)
 +{
 +      struct client_debug_mask *mask = (struct client_debug_mask *) c_mask;
 +
 +      if (keyword_is_amalgam(cdm_array[index].keyword))
 +              goto out;
 +
 +      if ((mask->mask1 & cdm_array[index].mask1) ||
 +          (mask->mask2 & cdm_array[index].mask2)) {
 +              if ((strlen(client_debug_string) +
 +                   strlen(cdm_array[index].keyword) + 1)
 +                      < ORANGEFS_MAX_DEBUG_STRING_LEN - 2) {
 +                              strcat(client_debug_string,
 +                                     cdm_array[index].keyword);
 +                              strcat(client_debug_string, ",");
 +                      } else {
 +                              gossip_err("%s: overflow!\n", __func__);
 +                              strcpy(client_debug_string, ORANGEFS_ALL);
 +                              goto out;
 +                      }
 +      }
 +out:
 +      return;
 +}
 +
 +static int keyword_is_amalgam(char *keyword)
 +{
 +      int rc = 0;
 +
 +      if ((!strcmp(keyword, ORANGEFS_ALL)) || (!strcmp(keyword, ORANGEFS_VERBOSE)))
 +              rc = 1;
 +
 +      return rc;
 +}
 +
 +/*
 + * kernel = type 0
 + * client = type 1
 + *
 + * return 1 if we found an amalgam.
 + */
 +static int check_amalgam_keyword(void *mask, int type)
 +{
 +      __u64 *k_mask;
 +      struct client_debug_mask *c_mask;
 +      int k_all_index = num_kmod_keyword_mask_map - 1;
 +      int rc = 0;
 +
 +      if (type) {
 +              c_mask = (struct client_debug_mask *) mask;
 +
 +              if ((c_mask->mask1 == cdm_array[client_all_index].mask1) &&
 +                  (c_mask->mask2 == cdm_array[client_all_index].mask2)) {
 +                      strcpy(client_debug_string, ORANGEFS_ALL);
 +                      rc = 1;
 +                      goto out;
 +              }
 +
 +              if ((c_mask->mask1 == cdm_array[client_verbose_index].mask1) &&
 +                  (c_mask->mask2 == cdm_array[client_verbose_index].mask2)) {
 +                      strcpy(client_debug_string, ORANGEFS_VERBOSE);
 +                      rc = 1;
 +                      goto out;
 +              }
 +
 +      } else {
 +              k_mask = (__u64 *) mask;
 +
 +              if (*k_mask >= s_kmod_keyword_mask_map[k_all_index].mask_val) {
 +                      strcpy(kernel_debug_string, ORANGEFS_ALL);
 +                      rc = 1;
 +                      goto out;
 +              }
 +      }
 +
 +out:
 +
 +      return rc;
 +}
 +
 +/*
 + * kernel = type 0
 + * client = type 1
 + */
 +static void debug_string_to_mask(char *debug_string, void *mask, int type)
 +{
 +      char *unchecked_keyword;
 +      int i;
 +      char *strsep_fodder = kstrdup(debug_string, GFP_KERNEL);
 +      char *original_pointer;
 +      int element_count = 0;
 +      struct client_debug_mask *c_mask = NULL;
 +      __u64 *k_mask = NULL;
 +
 +      gossip_debug(GOSSIP_UTILS_DEBUG, "%s: start\n", __func__);
 +
 +      if (type) {
 +              c_mask = (struct client_debug_mask *)mask;
 +              element_count = cdm_element_count;
 +      } else {
 +              k_mask = (__u64 *)mask;
 +              *k_mask = 0;
 +              element_count = num_kmod_keyword_mask_map;
 +      }
 +
 +      original_pointer = strsep_fodder;
 +      while ((unchecked_keyword = strsep(&strsep_fodder, ",")))
 +              if (strlen(unchecked_keyword)) {
 +                      for (i = 0; i < element_count; i++)
 +                              if (type)
 +                                      do_c_mask(i,
 +                                                unchecked_keyword,
 +                                                &c_mask);
 +                              else
 +                                      do_k_mask(i,
 +                                                unchecked_keyword,
 +                                                &k_mask);
 +              }
 +
 +      kfree(original_pointer);
 +}
 +
 +static void do_c_mask(int i, char *unchecked_keyword,
 +    struct client_debug_mask **sane_mask)
 +{
 +
 +      if (!strcmp(cdm_array[i].keyword, unchecked_keyword)) {
 +              (**sane_mask).mask1 = (**sane_mask).mask1 | cdm_array[i].mask1;
 +              (**sane_mask).mask2 = (**sane_mask).mask2 | cdm_array[i].mask2;
 +      }
 +}
 +
 +static void do_k_mask(int i, char *unchecked_keyword, __u64 **sane_mask)
 +{
 +
 +      if (!strcmp(s_kmod_keyword_mask_map[i].keyword, unchecked_keyword))
 +              **sane_mask = (**sane_mask) |
 +                              s_kmod_keyword_mask_map[i].mask_val;
 +}
 +
 +int orangefs_debugfs_new_client_mask(void __user *arg)
 +{
 +      struct dev_mask2_info_s mask2_info = {0};
 +      int ret;
 +
 +      ret = copy_from_user(&mask2_info,
 +                           (void __user *)arg,
 +                           sizeof(struct dev_mask2_info_s));
 +
 +      if (ret != 0)
 +              return -EIO;
 +
 +      client_debug_mask.mask1 = mask2_info.mask1_value;
 +      client_debug_mask.mask2 = mask2_info.mask2_value;
 +
 +      pr_info("%s: client debug mask has been been received "
 +              ":%llx: :%llx:\n",
 +              __func__,
 +              (unsigned long long)client_debug_mask.mask1,
 +              (unsigned long long)client_debug_mask.mask2);
 +
 +      return ret;
 +}
 +
 +int orangefs_debugfs_new_client_string(void __user *arg) 
 +{
 +      int ret;
 +
 +      ret = copy_from_user(&client_debug_array_string,
 +                                     (void __user *)arg,
 +                                     ORANGEFS_MAX_DEBUG_STRING_LEN);
 +      if (ret != 0)
 +              return -EIO;
 +
 +      /*
 +       * The real client-core makes an effort to ensure
 +       * that actual strings that aren't too long to fit in
 +       * this buffer is what we get here. We're going to use
 +       * string functions on the stuff we got, so we'll make
 +       * this extra effort to try and keep from
 +       * flowing out of this buffer when we use the string
 +       * functions, even if somehow the stuff we end up
 +       * with here is garbage.
 +       */
 +      client_debug_array_string[ORANGEFS_MAX_DEBUG_STRING_LEN - 1] =
 +              '\0';
 +      
 +      if (ret != 0) {
 +              pr_info("%s: CLIENT_STRING: copy_from_user failed\n",
 +                      __func__);
 +              return -EIO;
 +      }
 +
 +      pr_info("%s: client debug array string has been received.\n",
 +              __func__);
 +
 +      if (!help_string_initialized) {
 +
 +              /* Free the "we don't know yet" default string... */
 +              kfree(debug_help_string);
 +
 +              /* build a proper debug help string */
 +              if (orangefs_prepare_debugfs_help_string(0)) {
 +                      gossip_err("%s: no debug help string \n",
 +                                 __func__);
 +                      return -EIO;
 +              }
 +
 +              /* Replace the boilerplate boot-time debug-help file. */
 +              debugfs_remove(help_file_dentry);
 +
 +              help_file_dentry =
 +                      debugfs_create_file(
 +                              ORANGEFS_KMOD_DEBUG_HELP_FILE,
 +                              0444,
 +                              debug_dir,
 +                              debug_help_string,
 +                              &debug_help_fops);
 +
 +              if (!help_file_dentry) {
 +                      gossip_err("%s: debugfs_create_file failed for"
 +                                 " :%s:!\n",
 +                                 __func__,
 +                                 ORANGEFS_KMOD_DEBUG_HELP_FILE);
 +                      return -EIO;
 +              }
 +      }
 +
 +      debug_mask_to_string(&client_debug_mask, 1);
 +
 +      debugfs_remove(client_debug_dentry);
 +
 +      orangefs_client_debug_init();
 +
 +      help_string_initialized++;
 +
 +      return ret;
 +}
 +
 +int orangefs_debugfs_new_debug(void __user *arg) 
 +{
 +      struct dev_mask_info_s mask_info = {0};
 +      int ret;
 +
 +      ret = copy_from_user(&mask_info,
 +                           (void __user *)arg,
 +                           sizeof(mask_info));
 +
 +      if (ret != 0)
 +              return -EIO;
 +
 +      if (mask_info.mask_type == KERNEL_MASK) {
 +              if ((mask_info.mask_value == 0)
 +                  && (kernel_mask_set_mod_init)) {
 +                      /*
 +                       * the kernel debug mask was set when the
 +                       * kernel module was loaded; don't override
 +                       * it if the client-core was started without
 +                       * a value for ORANGEFS_KMODMASK.
 +                       */
 +                      return 0;
 +              }
 +              debug_mask_to_string(&mask_info.mask_value,
 +                                   mask_info.mask_type);
 +              orangefs_gossip_debug_mask = mask_info.mask_value;
 +              pr_info("%s: kernel debug mask has been modified to "
 +                      ":%s: :%llx:\n",
 +                      __func__,
 +                      kernel_debug_string,
 +                      (unsigned long long)orangefs_gossip_debug_mask);
 +      } else if (mask_info.mask_type == CLIENT_MASK) {
 +              debug_mask_to_string(&mask_info.mask_value,
 +                                   mask_info.mask_type);
 +              pr_info("%s: client debug mask has been modified to"
 +                      ":%s: :%llx:\n",
 +                      __func__,
 +                      client_debug_string,
 +                      llu(mask_info.mask_value));
 +      } else {
 +              gossip_lerr("Invalid mask type....\n");
 +              return -EINVAL;
 +      }
 +
 +      return ret;
 +}
diff --combined fs/proc/base.c
@@@ -400,23 -400,6 +400,6 @@@ static const struct file_operations pro
        .llseek = generic_file_llseek,
  };
  
- static int proc_pid_auxv(struct seq_file *m, struct pid_namespace *ns,
-                        struct pid *pid, struct task_struct *task)
- {
-       struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
-       if (mm && !IS_ERR(mm)) {
-               unsigned int nwords = 0;
-               do {
-                       nwords += 2;
-               } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
-               seq_write(m, mm->saved_auxv, nwords * sizeof(mm->saved_auxv[0]));
-               mmput(mm);
-               return 0;
-       } else
-               return PTR_ERR(mm);
- }
  #ifdef CONFIG_KALLSYMS
  /*
   * Provides a wchan file via kallsyms in a proper one-value-per-file format.
@@@ -483,7 -466,7 +466,7 @@@ static int proc_pid_stack(struct seq_fi
                save_stack_trace_tsk(task, &trace);
  
                for (i = 0; i < trace.nr_entries; i++) {
 -                      seq_printf(m, "[<%pK>] %pS\n",
 +                      seq_printf(m, "[<%pK>] %pB\n",
                                   (void *)entries[i], (void *)entries[i]);
                }
                unlock_trace(task);
@@@ -709,7 -692,7 +692,7 @@@ int proc_setattr(struct dentry *dentry
        if (attr->ia_valid & ATTR_MODE)
                return -EPERM;
  
-       error = inode_change_ok(inode, attr);
+       error = setattr_prepare(dentry, attr);
        if (error)
                return error;
  
@@@ -1014,6 -997,30 +997,30 @@@ static const struct file_operations pro
        .release        = mem_release,
  };
  
+ static int auxv_open(struct inode *inode, struct file *file)
+ {
+       return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS);
+ }
+ static ssize_t auxv_read(struct file *file, char __user *buf,
+                       size_t count, loff_t *ppos)
+ {
+       struct mm_struct *mm = file->private_data;
+       unsigned int nwords = 0;
+       do {
+               nwords += 2;
+       } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
+       return simple_read_from_buffer(buf, count, ppos, mm->saved_auxv,
+                                      nwords * sizeof(mm->saved_auxv[0]));
+ }
+ static const struct file_operations proc_auxv_operations = {
+       .open           = auxv_open,
+       .read           = auxv_read,
+       .llseek         = generic_file_llseek,
+       .release        = mem_release,
+ };
  static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
                            loff_t *ppos)
  {
@@@ -2280,27 -2287,16 +2287,27 @@@ static ssize_t timerslack_ns_write(stru
        if (!p)
                return -ESRCH;
  
 -      if (ptrace_may_access(p, PTRACE_MODE_ATTACH_FSCREDS)) {
 -              task_lock(p);
 -              if (slack_ns == 0)
 -                      p->timer_slack_ns = p->default_timer_slack_ns;
 -              else
 -                      p->timer_slack_ns = slack_ns;
 -              task_unlock(p);
 -      } else
 -              count = -EPERM;
 +      if (p != current) {
 +              if (!capable(CAP_SYS_NICE)) {
 +                      count = -EPERM;
 +                      goto out;
 +              }
 +
 +              err = security_task_setscheduler(p);
 +              if (err) {
 +                      count = err;
 +                      goto out;
 +              }
 +      }
 +
 +      task_lock(p);
 +      if (slack_ns == 0)
 +              p->timer_slack_ns = p->default_timer_slack_ns;
 +      else
 +              p->timer_slack_ns = slack_ns;
 +      task_unlock(p);
  
 +out:
        put_task_struct(p);
  
        return count;
@@@ -2310,28 -2306,19 +2317,28 @@@ static int timerslack_ns_show(struct se
  {
        struct inode *inode = m->private;
        struct task_struct *p;
 -      int err =  0;
 +      int err = 0;
  
        p = get_proc_task(inode);
        if (!p)
                return -ESRCH;
  
 -      if (ptrace_may_access(p, PTRACE_MODE_ATTACH_FSCREDS)) {
 -              task_lock(p);
 -              seq_printf(m, "%llu\n", p->timer_slack_ns);
 -              task_unlock(p);
 -      } else
 -              err = -EPERM;
 +      if (p != current) {
 +
 +              if (!capable(CAP_SYS_NICE)) {
 +                      err = -EPERM;
 +                      goto out;
 +              }
 +              err = security_task_getscheduler(p);
 +              if (err)
 +                      goto out;
 +      }
  
 +      task_lock(p);
 +      seq_printf(m, "%llu\n", p->timer_slack_ns);
 +      task_unlock(p);
 +
 +out:
        put_task_struct(p);
  
        return err;
@@@ -2842,7 -2829,7 +2849,7 @@@ static const struct pid_entry tgid_base
        DIR("net",        S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
  #endif
        REG("environ",    S_IRUSR, proc_environ_operations),
-       ONE("auxv",       S_IRUSR, proc_pid_auxv),
+       REG("auxv",       S_IRUSR, proc_auxv_operations),
        ONE("status",     S_IRUGO, proc_pid_status),
        ONE("personality", S_IRUSR, proc_pid_personality),
        ONE("limits",     S_IRUGO, proc_pid_limits),
@@@ -3230,7 -3217,7 +3237,7 @@@ static const struct pid_entry tid_base_
        DIR("net",        S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations),
  #endif
        REG("environ",   S_IRUSR, proc_environ_operations),
-       ONE("auxv",      S_IRUSR, proc_pid_auxv),
+       REG("auxv",      S_IRUSR, proc_auxv_operations),
        ONE("status",    S_IRUGO, proc_pid_status),
        ONE("personality", S_IRUSR, proc_pid_personality),
        ONE("limits",    S_IRUGO, proc_pid_limits),
diff --combined fs/proc/generic.c
@@@ -105,7 -105,7 +105,7 @@@ static int proc_notify_change(struct de
        struct proc_dir_entry *de = PDE(inode);
        int error;
  
-       error = inode_change_ok(inode, iattr);
+       error = setattr_prepare(dentry, iattr);
        if (error)
                return error;
  
@@@ -390,8 -390,6 +390,8 @@@ static struct proc_dir_entry *__proc_cr
        atomic_set(&ent->count, 1);
        spin_lock_init(&ent->pde_unload_lock);
        INIT_LIST_HEAD(&ent->pde_openers);
 +      proc_set_user(ent, (*parent)->uid, (*parent)->gid);
 +
  out:
        return ent;
  }
diff --combined fs/proc/proc_sysctl.c
@@@ -72,7 -72,7 +72,7 @@@ static DEFINE_SPINLOCK(sysctl_lock)
  
  static void drop_sysctl_table(struct ctl_table_header *header);
  static int sysctl_follow_link(struct ctl_table_header **phead,
 -      struct ctl_table **pentry, struct nsproxy *namespaces);
 +      struct ctl_table **pentry);
  static int insert_links(struct ctl_table_header *head);
  static void put_links(struct ctl_table_header *header);
  
@@@ -319,11 -319,11 +319,11 @@@ static void sysctl_head_finish(struct c
  }
  
  static struct ctl_table_set *
 -lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
 +lookup_header_set(struct ctl_table_root *root)
  {
        struct ctl_table_set *set = &root->default_set;
        if (root->lookup)
 -              set = root->lookup(root, namespaces);
 +              set = root->lookup(root);
        return set;
  }
  
@@@ -430,7 -430,6 +430,7 @@@ static int sysctl_perm(struct ctl_table
  static struct inode *proc_sys_make_inode(struct super_block *sb,
                struct ctl_table_header *head, struct ctl_table *table)
  {
 +      struct ctl_table_root *root = head->root;
        struct inode *inode;
        struct proc_inode *ei;
  
                if (is_empty_dir(head))
                        make_empty_dir_inode(inode);
        }
 +
 +      if (root->set_ownership)
 +              root->set_ownership(head, table, &inode->i_uid, &inode->i_gid);
 +
  out:
        return inode;
  }
@@@ -496,7 -491,7 +496,7 @@@ static struct dentry *proc_sys_lookup(s
                goto out;
  
        if (S_ISLNK(p->mode)) {
 -              ret = sysctl_follow_link(&h, &p, current->nsproxy);
 +              ret = sysctl_follow_link(&h, &p);
                err = ERR_PTR(ret);
                if (ret)
                        goto out;
@@@ -664,7 -659,7 +664,7 @@@ static bool proc_sys_link_fill_cache(st
  
        if (S_ISLNK(table->mode)) {
                /* It is not an error if we can not follow the link ignore it */
 -              int err = sysctl_follow_link(&head, &table, current->nsproxy);
 +              int err = sysctl_follow_link(&head, &table);
                if (err)
                        goto out;
        }
@@@ -759,7 -754,7 +759,7 @@@ static int proc_sys_setattr(struct dent
        if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))
                return -EPERM;
  
-       error = inode_change_ok(inode, attr);
+       error = setattr_prepare(dentry, attr);
        if (error)
                return error;
  
@@@ -981,7 -976,7 +981,7 @@@ static struct ctl_dir *xlate_dir(struc
  }
  
  static int sysctl_follow_link(struct ctl_table_header **phead,
 -      struct ctl_table **pentry, struct nsproxy *namespaces)
 +      struct ctl_table **pentry)
  {
        struct ctl_table_header *head;
        struct ctl_table_root *root;
        ret = 0;
        spin_lock(&sysctl_lock);
        root = (*pentry)->data;
 -      set = lookup_header_set(root, namespaces);
 +      set = lookup_header_set(root);
        dir = xlate_dir(set, (*phead)->parent);
        if (IS_ERR(dir))
                ret = PTR_ERR(dir);
diff --combined fs/xfs/xfs_file.c
@@@ -269,8 -269,6 +269,8 @@@ xfs_file_dio_aio_read
                return -EINVAL;
        }
  
 +      file_accessed(iocb->ki_filp);
 +
        /*
         * Locking is a bit tricky here. If we take an exclusive lock for direct
         * IO, we effectively serialise all new concurrent read IO to this file
        }
        xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
  
 -      file_accessed(iocb->ki_filp);
        return ret;
  }
  
@@@ -333,7 -332,10 +333,7 @@@ xfs_file_dax_read
        struct kiocb            *iocb,
        struct iov_iter         *to)
  {
 -      struct address_space    *mapping = iocb->ki_filp->f_mapping;
 -      struct inode            *inode = mapping->host;
 -      struct xfs_inode        *ip = XFS_I(inode);
 -      struct iov_iter         data = *to;
 +      struct xfs_inode        *ip = XFS_I(iocb->ki_filp->f_mapping->host);
        size_t                  count = iov_iter_count(to);
        ssize_t                 ret = 0;
  
                return 0; /* skip atime */
  
        xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
 -      ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, NULL, 0);
 -      if (ret > 0) {
 -              iocb->ki_pos += ret;
 -              iov_iter_advance(to, ret);
 -      }
 +      ret = iomap_dax_rw(iocb, to, &xfs_iomap_ops);
        xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
  
        file_accessed(iocb->ki_filp);
@@@ -393,6 -399,45 +393,6 @@@ xfs_file_read_iter
        return ret;
  }
  
 -STATIC ssize_t
 -xfs_file_splice_read(
 -      struct file             *infilp,
 -      loff_t                  *ppos,
 -      struct pipe_inode_info  *pipe,
 -      size_t                  count,
 -      unsigned int            flags)
 -{
 -      struct xfs_inode        *ip = XFS_I(infilp->f_mapping->host);
 -      ssize_t                 ret;
 -
 -      XFS_STATS_INC(ip->i_mount, xs_read_calls);
 -
 -      if (XFS_FORCED_SHUTDOWN(ip->i_mount))
 -              return -EIO;
 -
 -      trace_xfs_file_splice_read(ip, count, *ppos);
 -
 -      /*
 -       * DAX inodes cannot ues the page cache for splice, so we have to push
 -       * them through the VFS IO path. This means it goes through
 -       * ->read_iter, which for us takes the XFS_IOLOCK_SHARED. Hence we
 -       * cannot lock the splice operation at this level for DAX inodes.
 -       */
 -      if (IS_DAX(VFS_I(ip))) {
 -              ret = default_file_splice_read(infilp, ppos, pipe, count,
 -                                             flags);
 -              goto out;
 -      }
 -
 -      xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
 -      ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
 -      xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
 -out:
 -      if (ret > 0)
 -              XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret);
 -      return ret;
 -}
 -
  /*
   * Zero any on disk space between the current EOF and the new, larger EOF.
   *
@@@ -666,32 -711,70 +666,32 @@@ xfs_file_dax_write
        struct kiocb            *iocb,
        struct iov_iter         *from)
  {
 -      struct address_space    *mapping = iocb->ki_filp->f_mapping;
 -      struct inode            *inode = mapping->host;
 +      struct inode            *inode = iocb->ki_filp->f_mapping->host;
        struct xfs_inode        *ip = XFS_I(inode);
 -      struct xfs_mount        *mp = ip->i_mount;
 -      ssize_t                 ret = 0;
 -      int                     unaligned_io = 0;
 -      int                     iolock;
 -      struct iov_iter         data;
 +      int                     iolock = XFS_IOLOCK_EXCL;
 +      ssize_t                 ret, error = 0;
 +      size_t                  count;
 +      loff_t                  pos;
  
 -      /* "unaligned" here means not aligned to a filesystem block */
 -      if ((iocb->ki_pos & mp->m_blockmask) ||
 -          ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) {
 -              unaligned_io = 1;
 -              iolock = XFS_IOLOCK_EXCL;
 -      } else if (mapping->nrpages) {
 -              iolock = XFS_IOLOCK_EXCL;
 -      } else {
 -              iolock = XFS_IOLOCK_SHARED;
 -      }
        xfs_rw_ilock(ip, iolock);
 -
        ret = xfs_file_aio_write_checks(iocb, from, &iolock);
        if (ret)
                goto out;
  
 -      /*
 -       * Yes, even DAX files can have page cache attached to them:  A zeroed
 -       * page is inserted into the pagecache when we have to serve a write
 -       * fault on a hole.  It should never be dirtied and can simply be
 -       * dropped from the pagecache once we get real data for the page.
 -       *
 -       * XXX: This is racy against mmap, and there's nothing we can do about
 -       * it. dax_do_io() should really do this invalidation internally as
 -       * it will know if we've allocated over a holei for this specific IO and
 -       * if so it needs to update the mapping tree and invalidate existing
 -       * PTEs over the newly allocated range. Remove this invalidation when
 -       * dax_do_io() is fixed up.
 -       */
 -      if (mapping->nrpages) {
 -              loff_t end = iocb->ki_pos + iov_iter_count(from) - 1;
 +      pos = iocb->ki_pos;
 +      count = iov_iter_count(from);
  
 -              ret = invalidate_inode_pages2_range(mapping,
 -                                                  iocb->ki_pos >> PAGE_SHIFT,
 -                                                  end >> PAGE_SHIFT);
 -              WARN_ON_ONCE(ret);
 -      }
 +      trace_xfs_file_dax_write(ip, count, pos);
  
 -      if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) {
 -              xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
 -              iolock = XFS_IOLOCK_SHARED;
 +      ret = iomap_dax_rw(iocb, from, &xfs_iomap_ops);
 +      if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
 +              i_size_write(inode, iocb->ki_pos);
 +              error = xfs_setfilesize(ip, pos, ret);
        }
  
 -      trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
 -
 -      data = *from;
 -      ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
 -                      xfs_end_io_direct_write, 0);
 -      if (ret > 0) {
 -              iocb->ki_pos += ret;
 -              iov_iter_advance(from, ret);
 -      }
  out:
        xfs_rw_iunlock(ip, iolock);
 -      return ret;
 +      return error ? error : ret;
  }
  
  STATIC ssize_t
@@@ -901,7 -984,7 +901,7 @@@ xfs_file_fallocate
  
                iattr.ia_valid = ATTR_SIZE;
                iattr.ia_size = new_size;
-               error = xfs_setattr_size(ip, &iattr);
+               error = xfs_vn_setattr_size(file_dentry(file), &iattr);
                if (error)
                        goto out_unlock;
        }
@@@ -1430,7 -1513,7 +1430,7 @@@ xfs_filemap_page_mkwrite
        xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
  
        if (IS_DAX(inode)) {
 -              ret = dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault);
 +              ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops);
        } else {
                ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
                ret = block_page_mkwrite_return(ret);
@@@ -1464,7 -1547,7 +1464,7 @@@ xfs_filemap_fault
                 * changes to xfs_get_blocks_direct() to map unwritten extent
                 * ioend for conversion on read-only mappings.
                 */
 -              ret = dax_fault(vma, vmf, xfs_get_blocks_dax_fault);
 +              ret = iomap_dax_fault(vma, vmf, &xfs_iomap_ops);
        } else
                ret = filemap_fault(vma, vmf);
        xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
@@@ -1569,7 -1652,7 +1569,7 @@@ const struct file_operations xfs_file_o
        .llseek         = xfs_file_llseek,
        .read_iter      = xfs_file_read_iter,
        .write_iter     = xfs_file_write_iter,
 -      .splice_read    = xfs_file_splice_read,
 +      .splice_read    = generic_file_splice_read,
        .splice_write   = iter_file_splice_write,
        .unlocked_ioctl = xfs_file_ioctl,
  #ifdef CONFIG_COMPAT
        .open           = xfs_file_open,
        .release        = xfs_file_release,
        .fsync          = xfs_file_fsync,
 +      .get_unmapped_area = thp_get_unmapped_area,
        .fallocate      = xfs_file_fallocate,
  };
  
diff --combined include/linux/fs.h
@@@ -63,7 -63,7 +63,7 @@@ extern void __init files_maxfiles_init(
  
  extern struct files_stat_struct files_stat;
  extern unsigned long get_max_files(void);
- extern int sysctl_nr_open;
+ extern unsigned int sysctl_nr_open;
  extern struct inodes_stat_t inodes_stat;
  extern int leases_enable, lease_break_time;
  extern int sysctl_protected_symlinks;
@@@ -224,6 -224,7 +224,7 @@@ typedef int (dio_iodone_t)(struct kioc
  #define ATTR_KILL_PRIV        (1 << 14)
  #define ATTR_OPEN     (1 << 15) /* Truncating from open(O_TRUNC) */
  #define ATTR_TIMES_SET        (1 << 16)
+ #define ATTR_TOUCH    (1 << 17)
  
  /*
   * Whiteout is represented by a char device.  The following constants define the
@@@ -1064,6 -1065,18 +1065,18 @@@ struct file_lock_context 
  
  extern void send_sigio(struct fown_struct *fown, int fd, int band);
  
+ /*
+  * Return the inode to use for locking
+  *
+  * For overlayfs this should be the overlay inode, not the real inode returned
+  * by file_inode().  For any other fs file_inode(filp) and locks_inode(filp) are
+  * equal.
+  */
+ static inline struct inode *locks_inode(const struct file *f)
+ {
+       return f->f_path.dentry->d_inode;
+ }
  #ifdef CONFIG_FILE_LOCKING
  extern int fcntl_getlk(struct file *, unsigned int, struct flock __user *);
  extern int fcntl_setlk(unsigned int, struct file *, unsigned int,
@@@ -1251,7 -1264,7 +1264,7 @@@ static inline struct dentry *file_dentr
  
  static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl)
  {
-       return locks_lock_inode_wait(file_inode(filp), fl);
+       return locks_lock_inode_wait(locks_inode(filp), fl);
  }
  
  struct fasync_struct {
@@@ -2006,7 -2019,6 +2019,6 @@@ enum file_time_flags 
        S_VERSION = 8,
  };
  
- extern bool atime_needs_update(const struct path *, struct inode *);
  extern void touch_atime(const struct path *);
  static inline void file_accessed(struct file *file)
  {
@@@ -2155,7 -2167,7 +2167,7 @@@ static inline int mandatory_lock(struc
  
  static inline int locks_verify_locked(struct file *file)
  {
-       if (mandatory_lock(file_inode(file)))
+       if (mandatory_lock(locks_inode(file)))
                return locks_mandatory_locked(file);
        return 0;
  }
@@@ -2794,6 -2806,8 +2806,6 @@@ extern void block_sync_page(struct pag
  /* fs/splice.c */
  extern ssize_t generic_file_splice_read(struct file *, loff_t *,
                struct pipe_inode_info *, size_t, unsigned int);
 -extern ssize_t default_file_splice_read(struct file *, loff_t *,
 -              struct pipe_inode_info *, size_t, unsigned int);
  extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
                struct file *, loff_t *, size_t, unsigned int);
  extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
@@@ -2993,7 -3007,7 +3005,7 @@@ extern int buffer_migrate_page(struct a
  #define buffer_migrate_page NULL
  #endif
  
- extern int inode_change_ok(const struct inode *, struct iattr *);
+ extern int setattr_prepare(struct dentry *, struct iattr *);
  extern int inode_newsize_ok(const struct inode *, loff_t offset);
  extern void setattr_copy(struct inode *inode, const struct iattr *attr);
  
diff --combined include/linux/pagemap.h
@@@ -25,8 -25,6 +25,8 @@@ enum mapping_flags 
        AS_MM_ALL_LOCKS = __GFP_BITS_SHIFT + 2, /* under mm_take_all_locks() */
        AS_UNEVICTABLE  = __GFP_BITS_SHIFT + 3, /* e.g., ramdisk, SHM_LOCK */
        AS_EXITING      = __GFP_BITS_SHIFT + 4, /* final truncate in progress */
 +      /* writeback related tags are not used */
 +      AS_NO_WRITEBACK_TAGS = __GFP_BITS_SHIFT + 5,
  };
  
  static inline void mapping_set_error(struct address_space *mapping, int error)
@@@ -66,16 -64,6 +66,16 @@@ static inline int mapping_exiting(struc
        return test_bit(AS_EXITING, &mapping->flags);
  }
  
 +static inline void mapping_set_no_writeback_tags(struct address_space *mapping)
 +{
 +      set_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags);
 +}
 +
 +static inline int mapping_use_writeback_tags(struct address_space *mapping)
 +{
 +      return !test_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags);
 +}
 +
  static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
  {
        return (__force gfp_t)mapping->flags & __GFP_BITS_MASK;
@@@ -408,7 -396,7 +408,7 @@@ static inline loff_t page_offset(struc
  
  static inline loff_t page_file_offset(struct page *page)
  {
 -      return ((loff_t)page_file_index(page)) << PAGE_SHIFT;
 +      return ((loff_t)page_index(page)) << PAGE_SHIFT;
  }
  
  extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
@@@ -530,58 -518,9 +530,9 @@@ void page_endio(struct page *page, boo
  extern void add_page_wait_queue(struct page *page, wait_queue_t *waiter);
  
  /*
-  * Fault one or two userspace pages into pagetables.
-  * Return -EINVAL if more than two pages would be needed.
-  * Return non-zero on a fault.
+  * Fault everything in given userspace address range in.
   */
  static inline int fault_in_pages_writeable(char __user *uaddr, int size)
- {
-       int span, ret;
-       if (unlikely(size == 0))
-               return 0;
-       span = offset_in_page(uaddr) + size;
-       if (span > 2 * PAGE_SIZE)
-               return -EINVAL;
-       /*
-        * Writing zeroes into userspace here is OK, because we know that if
-        * the zero gets there, we'll be overwriting it.
-        */
-       ret = __put_user(0, uaddr);
-       if (ret == 0 && span > PAGE_SIZE)
-               ret = __put_user(0, uaddr + size - 1);
-       return ret;
- }
- static inline int fault_in_pages_readable(const char __user *uaddr, int size)
- {
-       volatile char c;
-       int ret;
-       if (unlikely(size == 0))
-               return 0;
-       ret = __get_user(c, uaddr);
-       if (ret == 0) {
-               const char __user *end = uaddr + size - 1;
-               if (((unsigned long)uaddr & PAGE_MASK) !=
-                               ((unsigned long)end & PAGE_MASK)) {
-                       ret = __get_user(c, end);
-                       (void)c;
-               }
-       }
-       return ret;
- }
- /*
-  * Multipage variants of the above prefault helpers, useful if more than
-  * PAGE_SIZE of data needs to be prefaulted. These are separate from the above
-  * functions (which only handle up to PAGE_SIZE) to avoid clobbering the
-  * filemap.c hotpaths.
-  */
- static inline int fault_in_multipages_writeable(char __user *uaddr, int size)
  {
        char __user *end = uaddr + size - 1;
  
        return 0;
  }
  
- static inline int fault_in_multipages_readable(const char __user *uaddr,
-                                              int size)
+ static inline int fault_in_pages_readable(const char __user *uaddr, int size)
  {
        volatile char c;
        const char __user *end = uaddr + size - 1;
diff --combined include/linux/uio.h
@@@ -13,7 -13,6 +13,7 @@@
  #include <uapi/linux/uio.h>
  
  struct page;
 +struct pipe_inode_info;
  
  struct kvec {
        void *iov_base; /* and that should *never* hold a userland pointer */
@@@ -24,7 -23,6 +24,7 @@@ enum 
        ITER_IOVEC = 0,
        ITER_KVEC = 2,
        ITER_BVEC = 4,
 +      ITER_PIPE = 8,
  };
  
  struct iov_iter {
                const struct iovec *iov;
                const struct kvec *kvec;
                const struct bio_vec *bvec;
 +              struct pipe_inode_info *pipe;
 +      };
 +      union {
 +              unsigned long nr_segs;
 +              int idx;
        };
 -      unsigned long nr_segs;
  };
  
  /*
@@@ -70,7 -64,7 +70,7 @@@ static inline struct iovec iov_iter_iov
  }
  
  #define iov_for_each(iov, iter, start)                                \
 -      if (!((start).type & ITER_BVEC))                        \
 +      if (!((start).type & (ITER_BVEC | ITER_PIPE)))          \
        for (iter = (start);                                    \
             (iter).count &&                                    \
             ((iov = iov_iter_iovec(&(iter))), 1);              \
@@@ -82,7 -76,6 +82,6 @@@ size_t iov_iter_copy_from_user_atomic(s
                struct iov_iter *i, unsigned long offset, size_t bytes);
  void iov_iter_advance(struct iov_iter *i, size_t bytes);
  int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
- #define iov_iter_fault_in_multipages_readable iov_iter_fault_in_readable
  size_t iov_iter_single_seg_count(const struct iov_iter *i);
  size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
                         struct iov_iter *i);
@@@ -100,8 -93,6 +99,8 @@@ void iov_iter_kvec(struct iov_iter *i, 
                        unsigned long nr_segs, size_t count);
  void iov_iter_bvec(struct iov_iter *i, int direction, const struct bio_vec *bvec,
                        unsigned long nr_segs, size_t count);
 +void iov_iter_pipe(struct iov_iter *i, int direction, struct pipe_inode_info *pipe,
 +                      size_t count);
  ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
                        size_t maxsize, unsigned maxpages, size_t *start);
  ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
@@@ -117,7 -108,7 +116,7 @@@ static inline size_t iov_iter_count(str
  
  static inline bool iter_is_iovec(struct iov_iter *i)
  {
 -      return !(i->type & (ITER_BVEC | ITER_KVEC));
 +      return !(i->type & (ITER_BVEC | ITER_KVEC | ITER_PIPE));
  }
  
  /*
@@@ -71,7 -71,6 +71,7 @@@ header-y += binfmts.
  header-y += blkpg.h
  header-y += blktrace_api.h
  header-y += bpf_common.h
 +header-y += bpf_perf_event.h
  header-y += bpf.h
  header-y += bpqether.h
  header-y += bsg.h
@@@ -336,6 -335,8 +336,8 @@@ header-y += pkt_cls.
  header-y += pkt_sched.h
  header-y += pmu.h
  header-y += poll.h
+ header-y += posix_acl.h
+ header-y += posix_acl_xattr.h
  header-y += posix_types.h
  header-y += ppdev.h
  header-y += ppp-comp.h
diff --combined kernel/sysctl.c
@@@ -65,7 -65,6 +65,7 @@@
  #include <linux/sched/sysctl.h>
  #include <linux/kexec.h>
  #include <linux/bpf.h>
 +#include <linux/mount.h>
  
  #include <asm/uaccess.h>
  #include <asm/processor.h>
@@@ -107,9 -106,8 +107,8 @@@ extern unsigned int core_pipe_limit
  extern int pid_max;
  extern int pid_max_min, pid_max_max;
  extern int percpu_pagelist_fraction;
- extern int compat_log;
  extern int latencytop_enabled;
- extern int sysctl_nr_open_min, sysctl_nr_open_max;
+ extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
  #ifndef CONFIG_MMU
  extern int sysctl_nr_trim_pages;
  #endif
@@@ -1085,15 -1083,6 +1084,6 @@@ static struct ctl_table kern_table[] = 
                .extra1         = &neg_one,
        },
  #endif
- #ifdef CONFIG_COMPAT
-       {
-               .procname       = "compat-log",
-               .data           = &compat_log,
-               .maxlen         = sizeof (int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
- #endif
  #ifdef CONFIG_RT_MUTEXES
        {
                .procname       = "max_lock_depth",
@@@ -1693,7 -1682,7 +1683,7 @@@ static struct ctl_table fs_table[] = 
        {
                .procname       = "nr_open",
                .data           = &sysctl_nr_open,
-               .maxlen         = sizeof(int),
+               .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
                .proc_handler   = proc_dointvec_minmax,
                .extra1         = &sysctl_nr_open_min,
                .mode           = 0644,
                .proc_handler   = proc_doulongvec_minmax,
        },
 +      {
 +              .procname       = "mount-max",
 +              .data           = &sysctl_mount_max,
 +              .maxlen         = sizeof(unsigned int),
 +              .mode           = 0644,
 +              .proc_handler   = proc_dointvec_minmax,
 +              .extra1         = &one,
 +      },
        { }
  };
  
diff --combined lib/iov_iter.c
@@@ -3,11 -3,8 +3,11 @@@
  #include <linux/pagemap.h>
  #include <linux/slab.h>
  #include <linux/vmalloc.h>
 +#include <linux/splice.h>
  #include <net/checksum.h>
  
 +#define PIPE_PARANOIA /* for now */
 +
  #define iterate_iovec(i, n, __v, __p, skip, STEP) {   \
        size_t left;                                    \
        size_t wanted = n;                              \
@@@ -293,93 -290,6 +293,93 @@@ done
        return wanted - bytes;
  }
  
 +#ifdef PIPE_PARANOIA
 +static bool sanity(const struct iov_iter *i)
 +{
 +      struct pipe_inode_info *pipe = i->pipe;
 +      int idx = i->idx;
 +      int next = pipe->curbuf + pipe->nrbufs;
 +      if (i->iov_offset) {
 +              struct pipe_buffer *p;
 +              if (unlikely(!pipe->nrbufs))
 +                      goto Bad;       // pipe must be non-empty
 +              if (unlikely(idx != ((next - 1) & (pipe->buffers - 1))))
 +                      goto Bad;       // must be at the last buffer...
 +
 +              p = &pipe->bufs[idx];
 +              if (unlikely(p->offset + p->len != i->iov_offset))
 +                      goto Bad;       // ... at the end of segment
 +      } else {
 +              if (idx != (next & (pipe->buffers - 1)))
 +                      goto Bad;       // must be right after the last buffer
 +      }
 +      return true;
 +Bad:
 +      printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset);
 +      printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n",
 +                      pipe->curbuf, pipe->nrbufs, pipe->buffers);
 +      for (idx = 0; idx < pipe->buffers; idx++)
 +              printk(KERN_ERR "[%p %p %d %d]\n",
 +                      pipe->bufs[idx].ops,
 +                      pipe->bufs[idx].page,
 +                      pipe->bufs[idx].offset,
 +                      pipe->bufs[idx].len);
 +      WARN_ON(1);
 +      return false;
 +}
 +#else
 +#define sanity(i) true
 +#endif
 +
 +static inline int next_idx(int idx, struct pipe_inode_info *pipe)
 +{
 +      return (idx + 1) & (pipe->buffers - 1);
 +}
 +
 +static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
 +                       struct iov_iter *i)
 +{
 +      struct pipe_inode_info *pipe = i->pipe;
 +      struct pipe_buffer *buf;
 +      size_t off;
 +      int idx;
 +
 +      if (unlikely(bytes > i->count))
 +              bytes = i->count;
 +
 +      if (unlikely(!bytes))
 +              return 0;
 +
 +      if (!sanity(i))
 +              return 0;
 +
 +      off = i->iov_offset;
 +      idx = i->idx;
 +      buf = &pipe->bufs[idx];
 +      if (off) {
 +              if (offset == off && buf->page == page) {
 +                      /* merge with the last one */
 +                      buf->len += bytes;
 +                      i->iov_offset += bytes;
 +                      goto out;
 +              }
 +              idx = next_idx(idx, pipe);
 +              buf = &pipe->bufs[idx];
 +      }
 +      if (idx == pipe->curbuf && pipe->nrbufs)
 +              return 0;
 +      pipe->nrbufs++;
 +      buf->ops = &page_cache_pipe_buf_ops;
 +      get_page(buf->page = page);
 +      buf->offset = offset;
 +      buf->len = bytes;
 +      i->iov_offset = offset + bytes;
 +      i->idx = idx;
 +out:
 +      i->count -= bytes;
 +      return bytes;
 +}
 +
  /*
   * Fault in one or more iovecs of the given iov_iter, to a maximum length of
   * bytes.  For each iovec, fault in each page that constitutes the iovec.
@@@ -396,8 -306,7 +396,7 @@@ int iov_iter_fault_in_readable(struct i
  
        if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
                iterate_iovec(i, bytes, v, iov, skip, ({
-                       err = fault_in_multipages_readable(v.iov_base,
-                                       v.iov_len);
+                       err = fault_in_pages_readable(v.iov_base, v.iov_len);
                        if (unlikely(err))
                        return err;
                0;}))
@@@ -446,98 -355,9 +445,98 @@@ static void memzero_page(struct page *p
        kunmap_atomic(addr);
  }
  
 +static inline bool allocated(struct pipe_buffer *buf)
 +{
 +      return buf->ops == &default_pipe_buf_ops;
 +}
 +
 +static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp)
 +{
 +      size_t off = i->iov_offset;
 +      int idx = i->idx;
 +      if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) {
 +              idx = next_idx(idx, i->pipe);
 +              off = 0;
 +      }
 +      *idxp = idx;
 +      *offp = off;
 +}
 +
 +static size_t push_pipe(struct iov_iter *i, size_t size,
 +                      int *idxp, size_t *offp)
 +{
 +      struct pipe_inode_info *pipe = i->pipe;
 +      size_t off;
 +      int idx;
 +      ssize_t left;
 +
 +      if (unlikely(size > i->count))
 +              size = i->count;
 +      if (unlikely(!size))
 +              return 0;
 +
 +      left = size;
 +      data_start(i, &idx, &off);
 +      *idxp = idx;
 +      *offp = off;
 +      if (off) {
 +              left -= PAGE_SIZE - off;
 +              if (left <= 0) {
 +                      pipe->bufs[idx].len += size;
 +                      return size;
 +              }
 +              pipe->bufs[idx].len = PAGE_SIZE;
 +              idx = next_idx(idx, pipe);
 +      }
 +      while (idx != pipe->curbuf || !pipe->nrbufs) {
 +              struct page *page = alloc_page(GFP_USER);
 +              if (!page)
 +                      break;
 +              pipe->nrbufs++;
 +              pipe->bufs[idx].ops = &default_pipe_buf_ops;
 +              pipe->bufs[idx].page = page;
 +              pipe->bufs[idx].offset = 0;
 +              if (left <= PAGE_SIZE) {
 +                      pipe->bufs[idx].len = left;
 +                      return size;
 +              }
 +              pipe->bufs[idx].len = PAGE_SIZE;
 +              left -= PAGE_SIZE;
 +              idx = next_idx(idx, pipe);
 +      }
 +      return size - left;
 +}
 +
 +static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
 +                              struct iov_iter *i)
 +{
 +      struct pipe_inode_info *pipe = i->pipe;
 +      size_t n, off;
 +      int idx;
 +
 +      if (!sanity(i))
 +              return 0;
 +
 +      bytes = n = push_pipe(i, bytes, &idx, &off);
 +      if (unlikely(!n))
 +              return 0;
 +      for ( ; n; idx = next_idx(idx, pipe), off = 0) {
 +              size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
 +              memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk);
 +              i->idx = idx;
 +              i->iov_offset = off + chunk;
 +              n -= chunk;
 +              addr += chunk;
 +      }
 +      i->count -= bytes;
 +      return bytes;
 +}
 +
  size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
  {
        const char *from = addr;
 +      if (unlikely(i->type & ITER_PIPE))
 +              return copy_pipe_to_iter(addr, bytes, i);
        iterate_and_advance(i, bytes, v,
                __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len,
                               v.iov_len),
@@@ -553,10 -373,6 +552,10 @@@ EXPORT_SYMBOL(copy_to_iter)
  size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
  {
        char *to = addr;
 +      if (unlikely(i->type & ITER_PIPE)) {
 +              WARN_ON(1);
 +              return 0;
 +      }
        iterate_and_advance(i, bytes, v,
                __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base,
                                 v.iov_len),
@@@ -572,10 -388,6 +571,10 @@@ EXPORT_SYMBOL(copy_from_iter)
  size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
  {
        char *to = addr;
 +      if (unlikely(i->type & ITER_PIPE)) {
 +              WARN_ON(1);
 +              return 0;
 +      }
        iterate_and_advance(i, bytes, v,
                __copy_from_user_nocache((to += v.iov_len) - v.iov_len,
                                         v.iov_base, v.iov_len),
@@@ -596,20 -408,14 +595,20 @@@ size_t copy_page_to_iter(struct page *p
                size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
                kunmap_atomic(kaddr);
                return wanted;
 -      } else
 +      } else if (likely(!(i->type & ITER_PIPE)))
                return copy_page_to_iter_iovec(page, offset, bytes, i);
 +      else
 +              return copy_page_to_iter_pipe(page, offset, bytes, i);
  }
  EXPORT_SYMBOL(copy_page_to_iter);
  
  size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
                         struct iov_iter *i)
  {
 +      if (unlikely(i->type & ITER_PIPE)) {
 +              WARN_ON(1);
 +              return 0;
 +      }
        if (i->type & (ITER_BVEC|ITER_KVEC)) {
                void *kaddr = kmap_atomic(page);
                size_t wanted = copy_from_iter(kaddr + offset, bytes, i);
  }
  EXPORT_SYMBOL(copy_page_from_iter);
  
 +static size_t pipe_zero(size_t bytes, struct iov_iter *i)
 +{
 +      struct pipe_inode_info *pipe = i->pipe;
 +      size_t n, off;
 +      int idx;
 +
 +      if (!sanity(i))
 +              return 0;
 +
 +      bytes = n = push_pipe(i, bytes, &idx, &off);
 +      if (unlikely(!n))
 +              return 0;
 +
 +      for ( ; n; idx = next_idx(idx, pipe), off = 0) {
 +              size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
 +              memzero_page(pipe->bufs[idx].page, off, chunk);
 +              i->idx = idx;
 +              i->iov_offset = off + chunk;
 +              n -= chunk;
 +      }
 +      i->count -= bytes;
 +      return bytes;
 +}
 +
  size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
  {
 +      if (unlikely(i->type & ITER_PIPE))
 +              return pipe_zero(bytes, i);
        iterate_and_advance(i, bytes, v,
                __clear_user(v.iov_base, v.iov_len),
                memzero_page(v.bv_page, v.bv_offset, v.bv_len),
@@@ -662,11 -442,6 +661,11 @@@ size_t iov_iter_copy_from_user_atomic(s
                struct iov_iter *i, unsigned long offset, size_t bytes)
  {
        char *kaddr = kmap_atomic(page), *p = kaddr + offset;
 +      if (unlikely(i->type & ITER_PIPE)) {
 +              kunmap_atomic(kaddr);
 +              WARN_ON(1);
 +              return 0;
 +      }
        iterate_all_kinds(i, bytes, v,
                __copy_from_user_inatomic((p += v.iov_len) - v.iov_len,
                                          v.iov_base, v.iov_len),
  }
  EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
  
 +static void pipe_advance(struct iov_iter *i, size_t size)
 +{
 +      struct pipe_inode_info *pipe = i->pipe;
 +      struct pipe_buffer *buf;
 +      int idx = i->idx;
 +      size_t off = i->iov_offset;
 +      
 +      if (unlikely(i->count < size))
 +              size = i->count;
 +
 +      if (size) {
 +              if (off) /* make it relative to the beginning of buffer */
 +                      size += off - pipe->bufs[idx].offset;
 +              while (1) {
 +                      buf = &pipe->bufs[idx];
 +                      if (size <= buf->len)
 +                              break;
 +                      size -= buf->len;
 +                      idx = next_idx(idx, pipe);
 +              }
 +              buf->len = size;
 +              i->idx = idx;
 +              off = i->iov_offset = buf->offset + size;
 +      }
 +      if (off)
 +              idx = next_idx(idx, pipe);
 +      if (pipe->nrbufs) {
 +              int unused = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
 +              /* [curbuf,unused) is in use.  Free [idx,unused) */
 +              while (idx != unused) {
 +                      pipe_buf_release(pipe, &pipe->bufs[idx]);
 +                      idx = next_idx(idx, pipe);
 +                      pipe->nrbufs--;
 +              }
 +      }
 +}
 +
  void iov_iter_advance(struct iov_iter *i, size_t size)
  {
 +      if (unlikely(i->type & ITER_PIPE)) {
 +              pipe_advance(i, size);
 +              return;
 +      }
        iterate_and_advance(i, size, v, 0, 0, 0)
  }
  EXPORT_SYMBOL(iov_iter_advance);
   */
  size_t iov_iter_single_seg_count(const struct iov_iter *i)
  {
 +      if (unlikely(i->type & ITER_PIPE))
 +              return i->count;        // it is a silly place, anyway
        if (i->nr_segs == 1)
                return i->count;
        else if (i->type & ITER_BVEC)
@@@ -768,19 -500,6 +767,19 @@@ void iov_iter_bvec(struct iov_iter *i, 
  }
  EXPORT_SYMBOL(iov_iter_bvec);
  
 +void iov_iter_pipe(struct iov_iter *i, int direction,
 +                      struct pipe_inode_info *pipe,
 +                      size_t count)
 +{
 +      BUG_ON(direction != ITER_PIPE);
 +      i->type = direction;
 +      i->pipe = pipe;
 +      i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
 +      i->iov_offset = 0;
 +      i->count = count;
 +}
 +EXPORT_SYMBOL(iov_iter_pipe);
 +
  unsigned long iov_iter_alignment(const struct iov_iter *i)
  {
        unsigned long res = 0;
        if (!size)
                return 0;
  
 +      if (unlikely(i->type & ITER_PIPE)) {
 +              if (i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
 +                      return size | i->iov_offset;
 +              return size;
 +      }
        iterate_all_kinds(i, size, v,
                (res |= (unsigned long)v.iov_base | v.iov_len, 0),
                res |= v.bv_offset | v.bv_len,
@@@ -810,11 -524,6 +809,11 @@@ unsigned long iov_iter_gap_alignment(co
        if (!size)
                return 0;
  
 +      if (unlikely(i->type & ITER_PIPE)) {
 +              WARN_ON(1);
 +              return ~0U;
 +      }
 +
        iterate_all_kinds(i, size, v,
                (res |= (!res ? 0 : (unsigned long)v.iov_base) |
                        (size != v.iov_len ? size : 0), 0),
  }
  EXPORT_SYMBOL(iov_iter_gap_alignment);
  
 +static inline size_t __pipe_get_pages(struct iov_iter *i,
 +                              size_t maxsize,
 +                              struct page **pages,
 +                              int idx,
 +                              size_t *start)
 +{
 +      struct pipe_inode_info *pipe = i->pipe;
 +      size_t n = push_pipe(i, maxsize, &idx, start);
 +      if (!n)
 +              return -EFAULT;
 +
 +      maxsize = n;
 +      n += *start;
 +      while (n >= PAGE_SIZE) {
 +              get_page(*pages++ = pipe->bufs[idx].page);
 +              idx = next_idx(idx, pipe);
 +              n -= PAGE_SIZE;
 +      }
 +
 +      return maxsize;
 +}
 +
 +static ssize_t pipe_get_pages(struct iov_iter *i,
 +                 struct page **pages, size_t maxsize, unsigned maxpages,
 +                 size_t *start)
 +{
 +      unsigned npages;
 +      size_t capacity;
 +      int idx;
 +
 +      if (!sanity(i))
 +              return -EFAULT;
 +
 +      data_start(i, &idx, start);
 +      /* some of this one + all after this one */
 +      npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
 +      capacity = min(npages,maxpages) * PAGE_SIZE - *start;
 +
 +      return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start);
 +}
 +
  ssize_t iov_iter_get_pages(struct iov_iter *i,
                   struct page **pages, size_t maxsize, unsigned maxpages,
                   size_t *start)
        if (!maxsize)
                return 0;
  
 +      if (unlikely(i->type & ITER_PIPE))
 +              return pipe_get_pages(i, pages, maxsize, maxpages, start);
        iterate_all_kinds(i, maxsize, v, ({
                unsigned long addr = (unsigned long)v.iov_base;
                size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
@@@ -915,37 -581,6 +914,37 @@@ static struct page **get_pages_array(si
        return p;
  }
  
 +static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
 +                 struct page ***pages, size_t maxsize,
 +                 size_t *start)
 +{
 +      struct page **p;
 +      size_t n;
 +      int idx;
 +      int npages;
 +
 +      if (!sanity(i))
 +              return -EFAULT;
 +
 +      data_start(i, &idx, start);
 +      /* some of this one + all after this one */
 +      npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
 +      n = npages * PAGE_SIZE - *start;
 +      if (maxsize > n)
 +              maxsize = n;
 +      else
 +              npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
 +      p = get_pages_array(npages);
 +      if (!p)
 +              return -ENOMEM;
 +      n = __pipe_get_pages(i, maxsize, p, idx, start);
 +      if (n > 0)
 +              *pages = p;
 +      else
 +              kvfree(p);
 +      return n;
 +}
 +
  ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
                   struct page ***pages, size_t maxsize,
                   size_t *start)
        if (!maxsize)
                return 0;
  
 +      if (unlikely(i->type & ITER_PIPE))
 +              return pipe_get_pages_alloc(i, pages, maxsize, start);
        iterate_all_kinds(i, maxsize, v, ({
                unsigned long addr = (unsigned long)v.iov_base;
                size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
@@@ -1001,10 -634,6 +1000,10 @@@ size_t csum_and_copy_from_iter(void *ad
        __wsum sum, next;
        size_t off = 0;
        sum = *csum;
 +      if (unlikely(i->type & ITER_PIPE)) {
 +              WARN_ON(1);
 +              return 0;
 +      }
        iterate_and_advance(i, bytes, v, ({
                int err = 0;
                next = csum_and_copy_from_user(v.iov_base, 
@@@ -1043,10 -672,6 +1042,10 @@@ size_t csum_and_copy_to_iter(const voi
        __wsum sum, next;
        size_t off = 0;
        sum = *csum;
 +      if (unlikely(i->type & ITER_PIPE)) {
 +              WARN_ON(1);     /* for now */
 +              return 0;
 +      }
        iterate_and_advance(i, bytes, v, ({
                int err = 0;
                next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
@@@ -1086,20 -711,7 +1085,20 @@@ int iov_iter_npages(const struct iov_it
        if (!size)
                return 0;
  
 -      iterate_all_kinds(i, size, v, ({
 +      if (unlikely(i->type & ITER_PIPE)) {
 +              struct pipe_inode_info *pipe = i->pipe;
 +              size_t off;
 +              int idx;
 +
 +              if (!sanity(i))
 +                      return 0;
 +
 +              data_start(i, &idx, &off);
 +              /* some of this one + all after this one */
 +              npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1;
 +              if (npages >= maxpages)
 +                      return maxpages;
 +      } else iterate_all_kinds(i, size, v, ({
                unsigned long p = (unsigned long)v.iov_base;
                npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
                        - p / PAGE_SIZE;
@@@ -1124,10 -736,6 +1123,10 @@@ EXPORT_SYMBOL(iov_iter_npages)
  const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
  {
        *new = *old;
 +      if (unlikely(new->type & ITER_PIPE)) {
 +              WARN_ON(1);
 +              return NULL;
 +      }
        if (new->type & ITER_BVEC)
                return new->bvec = kmemdup(new->bvec,
                                    new->nr_segs * sizeof(struct bio_vec),
diff --combined mm/shmem.c
@@@ -960,7 -960,7 +960,7 @@@ static int shmem_setattr(struct dentry 
        struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
        int error;
  
-       error = inode_change_ok(inode, attr);
+       error = setattr_prepare(dentry, attr);
        if (error)
                return error;
  
@@@ -2311,6 -2311,119 +2311,6 @@@ static ssize_t shmem_file_read_iter(str
        return retval ? retval : error;
  }
  
 -static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
 -                              struct pipe_inode_info *pipe, size_t len,
 -                              unsigned int flags)
 -{
 -      struct address_space *mapping = in->f_mapping;
 -      struct inode *inode = mapping->host;
 -      unsigned int loff, nr_pages, req_pages;
 -      struct page *pages[PIPE_DEF_BUFFERS];
 -      struct partial_page partial[PIPE_DEF_BUFFERS];
 -      struct page *page;
 -      pgoff_t index, end_index;
 -      loff_t isize, left;
 -      int error, page_nr;
 -      struct splice_pipe_desc spd = {
 -              .pages = pages,
 -              .partial = partial,
 -              .nr_pages_max = PIPE_DEF_BUFFERS,
 -              .flags = flags,
 -              .ops = &page_cache_pipe_buf_ops,
 -              .spd_release = spd_release_page,
 -      };
 -
 -      isize = i_size_read(inode);
 -      if (unlikely(*ppos >= isize))
 -              return 0;
 -
 -      left = isize - *ppos;
 -      if (unlikely(left < len))
 -              len = left;
 -
 -      if (splice_grow_spd(pipe, &spd))
 -              return -ENOMEM;
 -
 -      index = *ppos >> PAGE_SHIFT;
 -      loff = *ppos & ~PAGE_MASK;
 -      req_pages = (len + loff + PAGE_SIZE - 1) >> PAGE_SHIFT;
 -      nr_pages = min(req_pages, spd.nr_pages_max);
 -
 -      spd.nr_pages = find_get_pages_contig(mapping, index,
 -                                              nr_pages, spd.pages);
 -      index += spd.nr_pages;
 -      error = 0;
 -
 -      while (spd.nr_pages < nr_pages) {
 -              error = shmem_getpage(inode, index, &page, SGP_CACHE);
 -              if (error)
 -                      break;
 -              unlock_page(page);
 -              spd.pages[spd.nr_pages++] = page;
 -              index++;
 -      }
 -
 -      index = *ppos >> PAGE_SHIFT;
 -      nr_pages = spd.nr_pages;
 -      spd.nr_pages = 0;
 -
 -      for (page_nr = 0; page_nr < nr_pages; page_nr++) {
 -              unsigned int this_len;
 -
 -              if (!len)
 -                      break;
 -
 -              this_len = min_t(unsigned long, len, PAGE_SIZE - loff);
 -              page = spd.pages[page_nr];
 -
 -              if (!PageUptodate(page) || page->mapping != mapping) {
 -                      error = shmem_getpage(inode, index, &page, SGP_CACHE);
 -                      if (error)
 -                              break;
 -                      unlock_page(page);
 -                      put_page(spd.pages[page_nr]);
 -                      spd.pages[page_nr] = page;
 -              }
 -
 -              isize = i_size_read(inode);
 -              end_index = (isize - 1) >> PAGE_SHIFT;
 -              if (unlikely(!isize || index > end_index))
 -                      break;
 -
 -              if (end_index == index) {
 -                      unsigned int plen;
 -
 -                      plen = ((isize - 1) & ~PAGE_MASK) + 1;
 -                      if (plen <= loff)
 -                              break;
 -
 -                      this_len = min(this_len, plen - loff);
 -                      len = this_len;
 -              }
 -
 -              spd.partial[page_nr].offset = loff;
 -              spd.partial[page_nr].len = this_len;
 -              len -= this_len;
 -              loff = 0;
 -              spd.nr_pages++;
 -              index++;
 -      }
 -
 -      while (page_nr < nr_pages)
 -              put_page(spd.pages[page_nr++]);
 -
 -      if (spd.nr_pages)
 -              error = splice_to_pipe(pipe, &spd);
 -
 -      splice_shrink_spd(&spd);
 -
 -      if (error > 0) {
 -              *ppos += error;
 -              file_accessed(in);
 -      }
 -      return error;
 -}
 -
  /*
   * llseek SEEK_DATA or SEEK_HOLE through the radix_tree.
   */
@@@ -3673,7 -3786,7 +3673,7 @@@ static const struct file_operations shm
        .read_iter      = shmem_file_read_iter,
        .write_iter     = generic_file_write_iter,
        .fsync          = noop_fsync,
 -      .splice_read    = shmem_file_splice_read,
 +      .splice_read    = generic_file_splice_read,
        .splice_write   = iter_file_splice_write,
        .fallocate      = shmem_fallocate,
  #endif
@@@ -3965,7 -4078,7 +3965,7 @@@ EXPORT_SYMBOL_GPL(shmem_truncate_range)
  
  /* common code */
  
 -static struct dentry_operations anon_ops = {
 +static const struct dentry_operations anon_ops = {
        .d_dname = simple_dname
  };