Merge tag 'nfsd-4.6-1' of git://linux-nfs.org/~bfields/linux

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 25 Mar 2016 02:50:32 +0000 (19:50 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 25 Mar 2016 02:50:32 +0000 (19:50 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Mar 2016 02:50:32 +0000 (19:50 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Mar 2016 02:50:32 +0000 (19:50 -0700)
diff --git a/Documentation/filesystems/nfs/pnfs-scsi-server.txt b/Documentation/filesystems/nfs/pnfs-scsi-server.txt

new file mode 100644 (file)

index 0000000..5bef726
--- /dev/null
+++ b/Documentation/filesystems/nfs/pnfs-scsi-server.txt
@@ -0,0 +1,23 @@
+
+pNFS SCSI layout server user guide
+==================================
+
+This document describes support for pNFS SCSI layouts in the Linux NFS server.
+With pNFS SCSI layouts, the NFS server acts as Metadata Server (MDS) for pNFS,
+which in addition to handling all the metadata access to the NFS export,
+also hands out layouts to the clients so that they can directly access the
+underlying SCSI LUNs that are shared with the client.
+
+To use pNFS SCSI layouts with with the Linux NFS server, the exported file
+system needs to support the pNFS SCSI layouts (currently just XFS), and the
+file system must sit on a SCSI LUN that is accessible to the clients in
+addition to the MDS.  As of now the file system needs to sit directly on the
+exported LUN, striping or concatenation of LUNs on the MDS and clients
+is not supported yet.
+
+On a server built with CONFIG_NFSD_SCSI, the pNFS SCSI volume support is
+automatically enabled if the file system is exported using the "pnfs"
+option and the underlying SCSI device support persistent reservations.
+On the client make sure the kernel has the CONFIG_PNFS_BLOCK option
+enabled, and the file system is mounted using the NFSv4.1 protocol
+version (mount -o vers=4.1).
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c

index 8bc870e..02e4d87 100644 (file)
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -446,8 +446,8 @@ static void bl_free_layout_hdr(struct pnfs_layout_hdr *lo)
         kfree(bl);
  }
  
-static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode,
-                                                  gfp_t gfp_flags)
+static struct pnfs_layout_hdr *__bl_alloc_layout_hdr(struct inode *inode,
+               gfp_t gfp_flags, bool is_scsi_layout)
  {
         struct pnfs_block_layout *bl;
  
@@ -460,9 +460,22 @@ static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode,
         bl->bl_ext_ro = RB_ROOT;
         spin_lock_init(&bl->bl_ext_lock);
  
+       bl->bl_scsi_layout = is_scsi_layout;
         return &bl->bl_layout;
  }
  
+static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode,
+                                                  gfp_t gfp_flags)
+{
+       return __bl_alloc_layout_hdr(inode, gfp_flags, false);
+}
+
+static struct pnfs_layout_hdr *sl_alloc_layout_hdr(struct inode *inode,
+                                                  gfp_t gfp_flags)
+{
+       return __bl_alloc_layout_hdr(inode, gfp_flags, true);
+}
+
  static void bl_free_lseg(struct pnfs_layout_segment *lseg)
  {
         dprintk("%s enter\n", __func__);
@@ -889,22 +902,53 @@ static struct pnfs_layoutdriver_type blocklayout_type = {
         .sync                           = pnfs_generic_sync,
  };
  
+static struct pnfs_layoutdriver_type scsilayout_type = {
+       .id                             = LAYOUT_SCSI,
+       .name                           = "LAYOUT_SCSI",
+       .owner                          = THIS_MODULE,
+       .flags                          = PNFS_LAYOUTRET_ON_SETATTR |
+                                         PNFS_READ_WHOLE_PAGE,
+       .read_pagelist                  = bl_read_pagelist,
+       .write_pagelist                 = bl_write_pagelist,
+       .alloc_layout_hdr               = sl_alloc_layout_hdr,
+       .free_layout_hdr                = bl_free_layout_hdr,
+       .alloc_lseg                     = bl_alloc_lseg,
+       .free_lseg                      = bl_free_lseg,
+       .return_range                   = bl_return_range,
+       .prepare_layoutcommit           = bl_prepare_layoutcommit,
+       .cleanup_layoutcommit           = bl_cleanup_layoutcommit,
+       .set_layoutdriver               = bl_set_layoutdriver,
+       .alloc_deviceid_node            = bl_alloc_deviceid_node,
+       .free_deviceid_node             = bl_free_deviceid_node,
+       .pg_read_ops                    = &bl_pg_read_ops,
+       .pg_write_ops                   = &bl_pg_write_ops,
+       .sync                           = pnfs_generic_sync,
+};
+
+
  static int __init nfs4blocklayout_init(void)
  {
         int ret;
  
         dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__);
  
-       ret = pnfs_register_layoutdriver(&blocklayout_type);
+       ret = bl_init_pipefs();
         if (ret)
                 goto out;
-       ret = bl_init_pipefs();
+
+       ret = pnfs_register_layoutdriver(&blocklayout_type);
         if (ret)
-               goto out_unregister;
+               goto out_cleanup_pipe;
+
+       ret = pnfs_register_layoutdriver(&scsilayout_type);
+       if (ret)
+               goto out_unregister_block;
         return 0;
  
-out_unregister:
+out_unregister_block:
         pnfs_unregister_layoutdriver(&blocklayout_type);
+out_cleanup_pipe:
+       bl_cleanup_pipefs();
  out:
         return ret;
  }
@@ -914,8 +958,9 @@ static void __exit nfs4blocklayout_exit(void)
         dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n",
                __func__);
  
-       bl_cleanup_pipefs();
+       pnfs_unregister_layoutdriver(&scsilayout_type);
         pnfs_unregister_layoutdriver(&blocklayout_type);
+       bl_cleanup_pipefs();
  }
  
  MODULE_ALIAS("nfs-layouttype4-3");
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h

index c556640..bc21205 100644 (file)
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -55,7 +55,6 @@ struct pnfs_block_dev;
   */
  #define PNFS_BLOCK_UUID_LEN    128
  
-
  struct pnfs_block_volume {
         enum pnfs_block_volume_type     type;
         union {
@@ -82,6 +81,13 @@ struct pnfs_block_volume {
                         u32             volumes_count;
                         u32             volumes[PNFS_BLOCK_MAX_DEVICES];
                 } stripe;
+               struct {
+                       enum scsi_code_set              code_set;
+                       enum scsi_designator_type       designator_type;
+                       int                             designator_len;
+                       u8                              designator[256];
+                       u64                             pr_key;
+               } scsi;
         };
  };
  
@@ -106,6 +112,9 @@ struct pnfs_block_dev {
         struct block_device             *bdev;
         u64                             disk_offset;
  
+       u64                             pr_key;
+       bool                            pr_registered;
+
         bool (*map)(struct pnfs_block_dev *dev, u64 offset,
                         struct pnfs_block_dev_map *map);
  };
@@ -131,6 +140,7 @@ struct pnfs_block_layout {
         struct rb_root          bl_ext_rw;
         struct rb_root          bl_ext_ro;
         spinlock_t              bl_ext_lock;   /* Protects list manipulation */
+       bool                    bl_scsi_layout;
  };
  
  static inline struct pnfs_block_layout *
@@ -182,6 +192,6 @@ void ext_tree_mark_committed(struct nfs4_layoutcommit_args *arg, int status);
  dev_t bl_resolve_deviceid(struct nfs_server *server,
                 struct pnfs_block_volume *b, gfp_t gfp_mask);
  int __init bl_init_pipefs(void);
-void __exit bl_cleanup_pipefs(void);
+void bl_cleanup_pipefs(void);
  
  #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c

index a861bbd..e5b8967 100644 (file)
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -1,11 +1,12 @@
  /*
- * Copyright (c) 2014 Christoph Hellwig.
+ * Copyright (c) 2014-2016 Christoph Hellwig.
   */
  #include <linux/sunrpc/svc.h>
  #include <linux/blkdev.h>
  #include <linux/nfs4.h>
  #include <linux/nfs_fs.h>
  #include <linux/nfs_xdr.h>
+#include <linux/pr.h>
  
  #include "blocklayout.h"
  
@@ -21,6 +22,17 @@ bl_free_device(struct pnfs_block_dev *dev)
                         bl_free_device(&dev->children[i]);
                 kfree(dev->children);
         } else {
+               if (dev->pr_registered) {
+                       const struct pr_ops *ops =
+                               dev->bdev->bd_disk->fops->pr_ops;
+                       int error;
+
+                       error = ops->pr_register(dev->bdev, dev->pr_key, 0,
+                               false);
+                       if (error)
+                               pr_err("failed to unregister PR key.\n");
+               }
+
                 if (dev->bdev)
                         blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE);
         }
@@ -113,6 +125,24 @@ nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
                 for (i = 0; i < b->stripe.volumes_count; i++)
                         b->stripe.volumes[i] = be32_to_cpup(p++);
                 break;
+       case PNFS_BLOCK_VOLUME_SCSI:
+               p = xdr_inline_decode(xdr, 4 + 4 + 4);
+               if (!p)
+                       return -EIO;
+               b->scsi.code_set = be32_to_cpup(p++);
+               b->scsi.designator_type = be32_to_cpup(p++);
+               b->scsi.designator_len = be32_to_cpup(p++);
+               p = xdr_inline_decode(xdr, b->scsi.designator_len);
+               if (!p)
+                       return -EIO;
+               if (b->scsi.designator_len > 256)
+                       return -EIO;
+               memcpy(&b->scsi.designator, p, b->scsi.designator_len);
+               p = xdr_inline_decode(xdr, 8);
+               if (!p)
+                       return -EIO;
+               p = xdr_decode_hyper(p, &b->scsi.pr_key);
+               break;
         default:
                 dprintk("unknown volume type!\n");
                 return -EIO;
@@ -216,6 +246,116 @@ bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
         return 0;
  }
  
+static bool
+bl_validate_designator(struct pnfs_block_volume *v)
+{
+       switch (v->scsi.designator_type) {
+       case PS_DESIGNATOR_EUI64:
+               if (v->scsi.code_set != PS_CODE_SET_BINARY)
+                       return false;
+
+               if (v->scsi.designator_len != 8 &&
+                   v->scsi.designator_len != 10 &&
+                   v->scsi.designator_len != 16)
+                       return false;
+
+               return true;
+       case PS_DESIGNATOR_NAA:
+               if (v->scsi.code_set != PS_CODE_SET_BINARY)
+                       return false;
+
+               if (v->scsi.designator_len != 8 &&
+                   v->scsi.designator_len != 16)
+                       return false;
+
+               return true;
+       case PS_DESIGNATOR_T10:
+       case PS_DESIGNATOR_NAME:
+               pr_err("pNFS: unsupported designator "
+                       "(code set %d, type %d, len %d.\n",
+                       v->scsi.code_set,
+                       v->scsi.designator_type,
+                       v->scsi.designator_len);
+               return false;
+       default:
+               pr_err("pNFS: invalid designator "
+                       "(code set %d, type %d, len %d.\n",
+                       v->scsi.code_set,
+                       v->scsi.designator_type,
+                       v->scsi.designator_len);
+               return false;
+       }
+}
+
+static int
+bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
+               struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
+{
+       struct pnfs_block_volume *v = &volumes[idx];
+       const struct pr_ops *ops;
+       const char *devname;
+       int error;
+
+       if (!bl_validate_designator(v))
+               return -EINVAL;
+
+       switch (v->scsi.designator_len) {
+       case 8:
+               devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%8phN",
+                               v->scsi.designator);
+               break;
+       case 12:
+               devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%12phN",
+                               v->scsi.designator);
+               break;
+       case 16:
+               devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%16phN",
+                               v->scsi.designator);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       d->bdev = blkdev_get_by_path(devname, FMODE_READ, NULL);
+       if (IS_ERR(d->bdev)) {
+               pr_warn("pNFS: failed to open device %s (%ld)\n",
+                       devname, PTR_ERR(d->bdev));
+               kfree(devname);
+               return PTR_ERR(d->bdev);
+       }
+
+       kfree(devname);
+
+       d->len = i_size_read(d->bdev->bd_inode);
+       d->map = bl_map_simple;
+       d->pr_key = v->scsi.pr_key;
+
+       pr_info("pNFS: using block device %s (reservation key 0x%llx)\n",
+               d->bdev->bd_disk->disk_name, d->pr_key);
+
+       ops = d->bdev->bd_disk->fops->pr_ops;
+       if (!ops) {
+               pr_err("pNFS: block device %s does not support reservations.",
+                               d->bdev->bd_disk->disk_name);
+               error = -EINVAL;
+               goto out_blkdev_put;
+       }
+
+       error = ops->pr_register(d->bdev, 0, d->pr_key, true);
+       if (error) {
+               pr_err("pNFS: failed to register key for block device %s.",
+                               d->bdev->bd_disk->disk_name);
+               goto out_blkdev_put;
+       }
+
+       d->pr_registered = true;
+       return 0;
+
+out_blkdev_put:
+       blkdev_put(d->bdev, FMODE_READ);
+       return error;
+}
+
  static int
  bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
                 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
@@ -303,6 +443,8 @@ bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
                 return bl_parse_concat(server, d, volumes, idx, gfp_mask);
         case PNFS_BLOCK_VOLUME_STRIPE:
                 return bl_parse_stripe(server, d, volumes, idx, gfp_mask);
+       case PNFS_BLOCK_VOLUME_SCSI:
+               return bl_parse_scsi(server, d, volumes, idx, gfp_mask);
         default:
                 dprintk("unsupported volume type: %d\n", volumes[idx].type);
                 return -EIO;
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c

index 35ab51c..720b3ff 100644 (file)
--- a/fs/nfs/blocklayout/extent_tree.c
+++ b/fs/nfs/blocklayout/extent_tree.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2014 Christoph Hellwig.
+ * Copyright (c) 2014-2016 Christoph Hellwig.
   */
  
  #include <linux/vmalloc.h>
@@ -462,10 +462,12 @@ out:
         return err;
  }
  
-static size_t ext_tree_layoutupdate_size(size_t count)
+static size_t ext_tree_layoutupdate_size(struct pnfs_block_layout *bl, size_t count)
  {
-       return sizeof(__be32) /* number of entries */ +
-               PNFS_BLOCK_EXTENT_SIZE * count;
+       if (bl->bl_scsi_layout)
+               return sizeof(__be32) + PNFS_SCSI_RANGE_SIZE * count;
+       else
+               return sizeof(__be32) + PNFS_BLOCK_EXTENT_SIZE * count;
  }
  
  static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg,
@@ -483,6 +485,23 @@ static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg,
         }
  }
  
+static __be32 *encode_block_extent(struct pnfs_block_extent *be, __be32 *p)
+{
+       p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data,
+                       NFS4_DEVICEID4_SIZE);
+       p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT);
+       p = xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
+       p = xdr_encode_hyper(p, 0LL);
+       *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA);
+       return p;
+}
+
+static __be32 *encode_scsi_range(struct pnfs_block_extent *be, __be32 *p)
+{
+       p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT);
+       return xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
+}
+
  static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
                 size_t buffer_size, size_t *count)
  {
@@ -496,19 +515,16 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
                         continue;
  
                 (*count)++;
-               if (ext_tree_layoutupdate_size(*count) > buffer_size) {
+               if (ext_tree_layoutupdate_size(bl, *count) > buffer_size) {
                         /* keep counting.. */
                         ret = -ENOSPC;
                         continue;
                 }
  
-               p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data,
-                               NFS4_DEVICEID4_SIZE);
-               p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT);
-               p = xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
-               p = xdr_encode_hyper(p, 0LL);
-               *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA);
-
+               if (bl->bl_scsi_layout)
+                       p = encode_scsi_range(be, p);
+               else
+                       p = encode_block_extent(be, p);
                 be->be_tag = EXTENT_COMMITTING;
         }
         spin_unlock(&bl->bl_ext_lock);
@@ -537,7 +553,7 @@ retry:
         if (unlikely(ret)) {
                 ext_tree_free_commitdata(arg, buffer_size);
  
-               buffer_size = ext_tree_layoutupdate_size(count);
+               buffer_size = ext_tree_layoutupdate_size(bl, count);
                 count = 0;
  
                 arg->layoutupdate_pages =
@@ -556,7 +572,7 @@ retry:
         }
  
         *start_p = cpu_to_be32(count);
-       arg->layoutupdate_len = ext_tree_layoutupdate_size(count);
+       arg->layoutupdate_len = ext_tree_layoutupdate_size(bl, count);
  
         if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) {
                 void *p = start_p, *end = p + arg->layoutupdate_len;
diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c

index dbe5839..9fb067a 100644 (file)
--- a/fs/nfs/blocklayout/rpc_pipefs.c
+++ b/fs/nfs/blocklayout/rpc_pipefs.c
@@ -281,7 +281,7 @@ out:
         return ret;
  }
  
-void __exit bl_cleanup_pipefs(void)
+void bl_cleanup_pipefs(void)
  {
         rpc_pipefs_notifier_unregister(&nfs4blocklayout_block);
         unregister_pernet_subsys(&nfs4blocklayout_net_ops);
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig

index a0b77fc..c9f583d 100644 (file)
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -84,12 +84,30 @@ config NFSD_V4
           If unsure, say N.
  
  config NFSD_PNFS
-       bool "NFSv4.1 server support for Parallel NFS (pNFS)"
-       depends on NFSD_V4
+       bool
+
+config NFSD_BLOCKLAYOUT
+       bool "NFSv4.1 server support for pNFS block layouts"
+       depends on NFSD_V4 && BLOCK
+       select NFSD_PNFS
+       help
+         This option enables support for the exporting pNFS block layouts
+         in the kernel's NFS server. The pNFS block layout enables NFS
+         clients to directly perform I/O to block devices accesible to both
+         the server and the clients.  See RFC 5663 for more details.
+
+         If unsure, say N.
+
+config NFSD_SCSILAYOUT
+       bool "NFSv4.1 server support for pNFS SCSI layouts"
+       depends on NFSD_V4 && BLOCK
+       select NFSD_PNFS
         help
-         This option enables support for the parallel NFS features of the
-         minor version 1 of the NFSv4 protocol (RFC5661) in the kernel's NFS
-         server.
+         This option enables support for the exporting pNFS SCSI layouts
+         in the kernel's NFS server. The pNFS SCSI layout enables NFS
+         clients to directly perform I/O to SCSI devices accesible to both
+         the server and the clients.  See draft-ietf-nfsv4-scsi-layout for
+         more details.
  
           If unsure, say N.
  
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile

index 9a6028e..3ae5f3c 100644 (file)
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -17,4 +17,6 @@ nfsd-$(CONFIG_NFSD_V3)        += nfs3proc.o nfs3xdr.o
  nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
  nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
                            nfs4acl.o nfs4callback.o nfs4recover.o
-nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o blocklayout.o blocklayoutxdr.o
+nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o
+nfsd-$(CONFIG_NFSD_BLOCKLAYOUT) += blocklayout.o blocklayoutxdr.o
+nfsd-$(CONFIG_NFSD_SCSILAYOUT) += blocklayout.o blocklayoutxdr.o
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c

index c29d942..e55b524 100644 (file)
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -1,11 +1,14 @@
  /*
- * Copyright (c) 2014 Christoph Hellwig.
+ * Copyright (c) 2014-2016 Christoph Hellwig.
   */
  #include <linux/exportfs.h>
  #include <linux/genhd.h>
  #include <linux/slab.h>
+#include <linux/pr.h>
  
  #include <linux/nfsd/debug.h>
+#include <scsi/scsi_proto.h>
+#include <scsi/scsi_common.h>
  
  #include "blocklayoutxdr.h"
  #include "pnfs.h"
@@ -13,37 +16,6 @@
  #define NFSDDBG_FACILITY       NFSDDBG_PNFS
  
  
-static int
-nfsd4_block_get_device_info_simple(struct super_block *sb,
-               struct nfsd4_getdeviceinfo *gdp)
-{
-       struct pnfs_block_deviceaddr *dev;
-       struct pnfs_block_volume *b;
-
-       dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) +
-                     sizeof(struct pnfs_block_volume), GFP_KERNEL);
-       if (!dev)
-               return -ENOMEM;
-       gdp->gd_device = dev;
-
-       dev->nr_volumes = 1;
-       b = &dev->volumes[0];
-
-       b->type = PNFS_BLOCK_VOLUME_SIMPLE;
-       b->simple.sig_len = PNFS_BLOCK_UUID_LEN;
-       return sb->s_export_op->get_uuid(sb, b->simple.sig, &b->simple.sig_len,
-                       &b->simple.offset);
-}
-
-static __be32
-nfsd4_block_proc_getdeviceinfo(struct super_block *sb,
-               struct nfsd4_getdeviceinfo *gdp)
-{
-       if (sb->s_bdev != sb->s_bdev->bd_contains)
-               return nfserr_inval;
-       return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp));
-}
-
  static __be32
  nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
                 struct nfsd4_layoutget *args)
@@ -141,20 +113,13 @@ out_layoutunavailable:
  }
  
  static __be32
-nfsd4_block_proc_layoutcommit(struct inode *inode,
-               struct nfsd4_layoutcommit *lcp)
+nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp,
+               struct iomap *iomaps, int nr_iomaps)
  {
         loff_t new_size = lcp->lc_last_wr + 1;
         struct iattr iattr = { .ia_valid = 0 };
-       struct iomap *iomaps;
-       int nr_iomaps;
         int error;
  
-       nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
-                       lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits);
-       if (nr_iomaps < 0)
-               return nfserrno(nr_iomaps);
-
         if (lcp->lc_mtime.tv_nsec == UTIME_NOW ||
             timespec_compare(&lcp->lc_mtime, &inode->i_mtime) < 0)
                 lcp->lc_mtime = current_fs_time(inode->i_sb);
@@ -172,6 +137,54 @@ nfsd4_block_proc_layoutcommit(struct inode *inode,
         return nfserrno(error);
  }
  
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
+static int
+nfsd4_block_get_device_info_simple(struct super_block *sb,
+               struct nfsd4_getdeviceinfo *gdp)
+{
+       struct pnfs_block_deviceaddr *dev;
+       struct pnfs_block_volume *b;
+
+       dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) +
+                     sizeof(struct pnfs_block_volume), GFP_KERNEL);
+       if (!dev)
+               return -ENOMEM;
+       gdp->gd_device = dev;
+
+       dev->nr_volumes = 1;
+       b = &dev->volumes[0];
+
+       b->type = PNFS_BLOCK_VOLUME_SIMPLE;
+       b->simple.sig_len = PNFS_BLOCK_UUID_LEN;
+       return sb->s_export_op->get_uuid(sb, b->simple.sig, &b->simple.sig_len,
+                       &b->simple.offset);
+}
+
+static __be32
+nfsd4_block_proc_getdeviceinfo(struct super_block *sb,
+               struct nfs4_client *clp,
+               struct nfsd4_getdeviceinfo *gdp)
+{
+       if (sb->s_bdev != sb->s_bdev->bd_contains)
+               return nfserr_inval;
+       return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp));
+}
+
+static __be32
+nfsd4_block_proc_layoutcommit(struct inode *inode,
+               struct nfsd4_layoutcommit *lcp)
+{
+       struct iomap *iomaps;
+       int nr_iomaps;
+
+       nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
+                       lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits);
+       if (nr_iomaps < 0)
+               return nfserrno(nr_iomaps);
+
+       return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
+}
+
  const struct nfsd4_layout_ops bl_layout_ops = {
         /*
          * Pretend that we send notification to the client.  This is a blatant
@@ -190,3 +203,206 @@ const struct nfsd4_layout_ops bl_layout_ops = {
         .encode_layoutget       = nfsd4_block_encode_layoutget,
         .proc_layoutcommit      = nfsd4_block_proc_layoutcommit,
  };
+#endif /* CONFIG_NFSD_BLOCKLAYOUT */
+
+#ifdef CONFIG_NFSD_SCSILAYOUT
+static int nfsd4_scsi_identify_device(struct block_device *bdev,
+               struct pnfs_block_volume *b)
+{
+       struct request_queue *q = bdev->bd_disk->queue;
+       struct request *rq;
+       size_t bufflen = 252, len, id_len;
+       u8 *buf, *d, type, assoc;
+       int error;
+
+       buf = kzalloc(bufflen, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       rq = blk_get_request(q, READ, GFP_KERNEL);
+       if (IS_ERR(rq)) {
+               error = -ENOMEM;
+               goto out_free_buf;
+       }
+       blk_rq_set_block_pc(rq);
+
+       error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL);
+       if (error)
+               goto out_put_request;
+
+       rq->cmd[0] = INQUIRY;
+       rq->cmd[1] = 1;
+       rq->cmd[2] = 0x83;
+       rq->cmd[3] = bufflen >> 8;
+       rq->cmd[4] = bufflen & 0xff;
+       rq->cmd_len = COMMAND_SIZE(INQUIRY);
+
+       error = blk_execute_rq(rq->q, NULL, rq, 1);
+       if (error) {
+               pr_err("pNFS: INQUIRY 0x83 failed with: %x\n",
+                       rq->errors);
+               goto out_put_request;
+       }
+
+       len = (buf[2] << 8) + buf[3] + 4;
+       if (len > bufflen) {
+               pr_err("pNFS: INQUIRY 0x83 response invalid (len = %zd)\n",
+                       len);
+               goto out_put_request;
+       }
+
+       d = buf + 4;
+       for (d = buf + 4; d < buf + len; d += id_len + 4) {
+               id_len = d[3];
+               type = d[1] & 0xf;
+               assoc = (d[1] >> 4) & 0x3;
+
+               /*
+                * We only care about a EUI-64 and NAA designator types
+                * with LU association.
+                */
+               if (assoc != 0x00)
+                       continue;
+               if (type != 0x02 && type != 0x03)
+                       continue;
+               if (id_len != 8 && id_len != 12 && id_len != 16)
+                       continue;
+
+               b->scsi.code_set = PS_CODE_SET_BINARY;
+               b->scsi.designator_type = type == 0x02 ?
+                       PS_DESIGNATOR_EUI64 : PS_DESIGNATOR_NAA;
+               b->scsi.designator_len = id_len;
+               memcpy(b->scsi.designator, d + 4, id_len);
+
+               /*
+                * If we found a 8 or 12 byte descriptor continue on to
+                * see if a 16 byte one is available.  If we find a
+                * 16 byte descriptor we're done.
+                */
+               if (id_len == 16)
+                       break;
+       }
+
+out_put_request:
+       blk_put_request(rq);
+out_free_buf:
+       kfree(buf);
+       return error;
+}
+
+#define NFSD_MDS_PR_KEY                0x0100000000000000
+
+/*
+ * We use the client ID as a unique key for the reservations.
+ * This allows us to easily fence a client when recalls fail.
+ */
+static u64 nfsd4_scsi_pr_key(struct nfs4_client *clp)
+{
+       return ((u64)clp->cl_clientid.cl_boot << 32) | clp->cl_clientid.cl_id;
+}
+
+static int
+nfsd4_block_get_device_info_scsi(struct super_block *sb,
+               struct nfs4_client *clp,
+               struct nfsd4_getdeviceinfo *gdp)
+{
+       struct pnfs_block_deviceaddr *dev;
+       struct pnfs_block_volume *b;
+       const struct pr_ops *ops;
+       int error;
+
+       dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) +
+                     sizeof(struct pnfs_block_volume), GFP_KERNEL);
+       if (!dev)
+               return -ENOMEM;
+       gdp->gd_device = dev;
+
+       dev->nr_volumes = 1;
+       b = &dev->volumes[0];
+
+       b->type = PNFS_BLOCK_VOLUME_SCSI;
+       b->scsi.pr_key = nfsd4_scsi_pr_key(clp);
+
+       error = nfsd4_scsi_identify_device(sb->s_bdev, b);
+       if (error)
+               return error;
+
+       ops = sb->s_bdev->bd_disk->fops->pr_ops;
+       if (!ops) {
+               pr_err("pNFS: device %s does not support PRs.\n",
+                       sb->s_id);
+               return -EINVAL;
+       }
+
+       error = ops->pr_register(sb->s_bdev, 0, NFSD_MDS_PR_KEY, true);
+       if (error) {
+               pr_err("pNFS: failed to register key for device %s.\n",
+                       sb->s_id);
+               return -EINVAL;
+       }
+
+       error = ops->pr_reserve(sb->s_bdev, NFSD_MDS_PR_KEY,
+                       PR_EXCLUSIVE_ACCESS_REG_ONLY, 0);
+       if (error) {
+               pr_err("pNFS: failed to reserve device %s.\n",
+                       sb->s_id);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static __be32
+nfsd4_scsi_proc_getdeviceinfo(struct super_block *sb,
+               struct nfs4_client *clp,
+               struct nfsd4_getdeviceinfo *gdp)
+{
+       if (sb->s_bdev != sb->s_bdev->bd_contains)
+               return nfserr_inval;
+       return nfserrno(nfsd4_block_get_device_info_scsi(sb, clp, gdp));
+}
+static __be32
+nfsd4_scsi_proc_layoutcommit(struct inode *inode,
+               struct nfsd4_layoutcommit *lcp)
+{
+       struct iomap *iomaps;
+       int nr_iomaps;
+
+       nr_iomaps = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout,
+                       lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits);
+       if (nr_iomaps < 0)
+               return nfserrno(nr_iomaps);
+
+       return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
+}
+
+static void
+nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls)
+{
+       struct nfs4_client *clp = ls->ls_stid.sc_client;
+       struct block_device *bdev = ls->ls_file->f_path.mnt->mnt_sb->s_bdev;
+
+       bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
+                       nfsd4_scsi_pr_key(clp), 0, true);
+}
+
+const struct nfsd4_layout_ops scsi_layout_ops = {
+       /*
+        * Pretend that we send notification to the client.  This is a blatant
+        * lie to force recent Linux clients to cache our device IDs.
+        * We rarely ever change the device ID, so the harm of leaking deviceids
+        * for a while isn't too bad.  Unfortunately RFC5661 is a complete mess
+        * in this regard, but I filed errata 4119 for this a while ago, and
+        * hopefully the Linux client will eventually start caching deviceids
+        * without this again.
+        */
+       .notify_types           =
+                       NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
+       .proc_getdeviceinfo     = nfsd4_scsi_proc_getdeviceinfo,
+       .encode_getdeviceinfo   = nfsd4_block_encode_getdeviceinfo,
+       .proc_layoutget         = nfsd4_block_proc_layoutget,
+       .encode_layoutget       = nfsd4_block_encode_layoutget,
+       .proc_layoutcommit      = nfsd4_scsi_proc_layoutcommit,
+       .fence_client           = nfsd4_scsi_fence_client,
+};
+#endif /* CONFIG_NFSD_SCSILAYOUT */
diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c

index 6d834dc..6c3b316 100644 (file)
--- a/fs/nfsd/blocklayoutxdr.c
+++ b/fs/nfsd/blocklayoutxdr.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2014 Christoph Hellwig.
+ * Copyright (c) 2014-2016 Christoph Hellwig.
   */
  #include <linux/sunrpc/svc.h>
  #include <linux/exportfs.h>
@@ -53,6 +53,18 @@ nfsd4_block_encode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
                 p = xdr_encode_hyper(p, b->simple.offset);
                 p = xdr_encode_opaque(p, b->simple.sig, b->simple.sig_len);
                 break;
+       case PNFS_BLOCK_VOLUME_SCSI:
+               len = 4 + 4 + 4 + 4 + b->scsi.designator_len + 8;
+               p = xdr_reserve_space(xdr, len);
+               if (!p)
+                       return -ETOOSMALL;
+
+               *p++ = cpu_to_be32(b->type);
+               *p++ = cpu_to_be32(b->scsi.code_set);
+               *p++ = cpu_to_be32(b->scsi.designator_type);
+               p = xdr_encode_opaque(p, b->scsi.designator, b->scsi.designator_len);
+               p = xdr_encode_hyper(p, b->scsi.pr_key);
+               break;
         default:
                 return -ENOTSUPP;
         }
@@ -93,18 +105,22 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
                 u32 block_size)
  {
         struct iomap *iomaps;
-       u32 nr_iomaps, expected, i;
+       u32 nr_iomaps, i;
  
         if (len < sizeof(u32)) {
                 dprintk("%s: extent array too small: %u\n", __func__, len);
                 return -EINVAL;
         }
+       len -= sizeof(u32);
+       if (len % PNFS_BLOCK_EXTENT_SIZE) {
+               dprintk("%s: extent array invalid: %u\n", __func__, len);
+               return -EINVAL;
+       }
  
         nr_iomaps = be32_to_cpup(p++);
-       expected = sizeof(__be32) + nr_iomaps * PNFS_BLOCK_EXTENT_SIZE;
-       if (len != expected) {
+       if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE) {
                 dprintk("%s: extent array size mismatch: %u/%u\n",
-                       __func__, len, expected);
+                       __func__, len, nr_iomaps);
                 return -EINVAL;
         }
  
@@ -155,3 +171,54 @@ fail:
         kfree(iomaps);
         return -EINVAL;
  }
+
+int
+nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
+               u32 block_size)
+{
+       struct iomap *iomaps;
+       u32 nr_iomaps, expected, i;
+
+       if (len < sizeof(u32)) {
+               dprintk("%s: extent array too small: %u\n", __func__, len);
+               return -EINVAL;
+       }
+
+       nr_iomaps = be32_to_cpup(p++);
+       expected = sizeof(__be32) + nr_iomaps * PNFS_SCSI_RANGE_SIZE;
+       if (len != expected) {
+               dprintk("%s: extent array size mismatch: %u/%u\n",
+                       __func__, len, expected);
+               return -EINVAL;
+       }
+
+       iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL);
+       if (!iomaps) {
+               dprintk("%s: failed to allocate extent array\n", __func__);
+               return -ENOMEM;
+       }
+
+       for (i = 0; i < nr_iomaps; i++) {
+               u64 val;
+
+               p = xdr_decode_hyper(p, &val);
+               if (val & (block_size - 1)) {
+                       dprintk("%s: unaligned offset 0x%llx\n", __func__, val);
+                       goto fail;
+               }
+               iomaps[i].offset = val;
+
+               p = xdr_decode_hyper(p, &val);
+               if (val & (block_size - 1)) {
+                       dprintk("%s: unaligned length 0x%llx\n", __func__, val);
+                       goto fail;
+               }
+               iomaps[i].length = val;
+       }
+
+       *iomapp = iomaps;
+       return nr_iomaps;
+fail:
+       kfree(iomaps);
+       return -EINVAL;
+}
diff --git a/fs/nfsd/blocklayoutxdr.h b/fs/nfsd/blocklayoutxdr.h

index 6de925f..397bc75 100644 (file)
--- a/fs/nfsd/blocklayoutxdr.h
+++ b/fs/nfsd/blocklayoutxdr.h
@@ -15,6 +15,11 @@ struct pnfs_block_extent {
         enum pnfs_block_extent_state    es;
  };
  
+struct pnfs_block_range {
+       u64                             foff;
+       u64                             len;
+};
+
  /*
   * Random upper cap for the uuid length to avoid unbounded allocation.
   * Not actually limited by the protocol.
@@ -29,6 +34,13 @@ struct pnfs_block_volume {
                         u32             sig_len;
                         u8              sig[PNFS_BLOCK_UUID_LEN];
                 } simple;
+               struct {
+                       enum scsi_code_set              code_set;
+                       enum scsi_designator_type       designator_type;
+                       int                             designator_len;
+                       u8                              designator[256];
+                       u64                             pr_key;
+               } scsi;
         };
  };
  
@@ -43,5 +55,7 @@ __be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr,
                 struct nfsd4_layoutget *lgp);
  int nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
                 u32 block_size);
+int nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
+               u32 block_size);
  
  #endif /* _NFSD_BLOCKLAYOUTXDR_H */
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c

index 7b755b7..51c3b06 100644 (file)
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -147,6 +147,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
  {
         __be32  nfserr;
         u32     max_blocksize = svc_max_payload(rqstp);
+       unsigned long cnt = min(argp->count, max_blocksize);
  
         dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
                                 SVCFH_fmt(&argp->fh),
@@ -157,7 +158,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
          * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
          * + 1 (xdr opaque byte count) = 26
          */
-       resp->count = min(argp->count, max_blocksize);
+       resp->count = cnt;
         svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
  
         fh_copy(&resp->fh, &argp->fh);
@@ -167,8 +168,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
                                   &resp->count);
         if (nfserr == 0) {
                 struct inode    *inode = d_inode(resp->fh.fh_dentry);
-
-               resp->eof = (argp->offset + resp->count) >= inode->i_size;
+               resp->eof = nfsd_eof_on_read(cnt, resp->count, argp->offset,
+                                                       inode->i_size);
         }
  
         RETURN_STATUS(nfserr);
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c

index ce2d010..825c7bc 100644 (file)
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -1,6 +1,7 @@
  /*
   * Copyright (c) 2014 Christoph Hellwig.
   */
+#include <linux/blkdev.h>
  #include <linux/kmod.h>
  #include <linux/file.h>
  #include <linux/jhash.h>
@@ -26,7 +27,12 @@ static const struct nfsd4_callback_ops nfsd4_cb_layout_ops;
  static const struct lock_manager_operations nfsd4_layouts_lm_ops;
  
  const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] =  {
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
         [LAYOUT_BLOCK_VOLUME]   = &bl_layout_ops,
+#endif
+#ifdef CONFIG_NFSD_SCSILAYOUT
+       [LAYOUT_SCSI]           = &scsi_layout_ops,
+#endif
  };
  
  /* pNFS device ID to export fsid mapping */
@@ -121,10 +127,24 @@ void nfsd4_setup_layout_type(struct svc_export *exp)
         if (!(exp->ex_flags & NFSEXP_PNFS))
                 return;
  
+       /*
+        * Check if the file system supports exporting a block-like layout.
+        * If the block device supports reservations prefer the SCSI layout,
+        * otherwise advertise the block layout.
+        */
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
         if (sb->s_export_op->get_uuid &&
             sb->s_export_op->map_blocks &&
             sb->s_export_op->commit_blocks)
                 exp->ex_layout_type = LAYOUT_BLOCK_VOLUME;
+#endif
+#ifdef CONFIG_NFSD_SCSILAYOUT
+       /* overwrite block layout selection if needed */
+       if (sb->s_export_op->map_blocks &&
+           sb->s_export_op->commit_blocks &&
+           sb->s_bdev && sb->s_bdev->bd_disk->fops->pr_ops)
+               exp->ex_layout_type = LAYOUT_SCSI;
+#endif
  }
  
  static void
@@ -590,8 +610,6 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
  
         rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str));
  
-       trace_layout_recall_fail(&ls->ls_stid.sc_stateid);
-
         printk(KERN_WARNING
                 "nfsd: client %s failed to respond to layout recall. "
                 "  Fencing..\n", addr_str);
@@ -626,6 +644,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
                 container_of(cb, struct nfs4_layout_stateid, ls_recall);
         struct nfsd_net *nn;
         ktime_t now, cutoff;
+       const struct nfsd4_layout_ops *ops;
         LIST_HEAD(reaplist);
  
  
@@ -661,7 +680,13 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
                 /*
                  * Unknown error or non-responding client, we'll need to fence.
                  */
-               nfsd4_cb_layout_fail(ls);
+               trace_layout_recall_fail(&ls->ls_stid.sc_stateid);
+
+               ops = nfsd4_layout_ops[ls->ls_layout_type];
+               if (ops->fence_client)
+                       ops->fence_client(ls);
+               else
+                       nfsd4_cb_layout_fail(ls);
                 return -1;
         }
  }
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c

index 40b9124..de1ff1d 100644 (file)
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1268,8 +1268,10 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp,
                 goto out;
  
         nfserr = nfs_ok;
-       if (gdp->gd_maxcount != 0)
-               nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp);
+       if (gdp->gd_maxcount != 0) {
+               nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb,
+                                       cstate->session->se_client, gdp);
+       }
  
         gdp->gd_notify_types &= ops->notify_types;
  out:
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c

index aa87954..9df898b 100644 (file)
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3365,6 +3365,7 @@ static __be32 nfsd4_encode_splice_read(
         struct xdr_stream *xdr = &resp->xdr;
         struct xdr_buf *buf = xdr->buf;
         u32 eof;
+       long len;
         int space_left;
         __be32 nfserr;
         __be32 *p = xdr->p - 2;
@@ -3373,6 +3374,7 @@ static __be32 nfsd4_encode_splice_read(
         if (xdr->end - xdr->p < 1)
                 return nfserr_resource;
  
+       len = maxcount;
         nfserr = nfsd_splice_read(read->rd_rqstp, file,
                                   read->rd_offset, &maxcount);
         if (nfserr) {
@@ -3385,8 +3387,8 @@ static __be32 nfsd4_encode_splice_read(
                 return nfserr;
         }
  
-       eof = (read->rd_offset + maxcount >=
-              d_inode(read->rd_fhp->fh_dentry)->i_size);
+       eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
+                               d_inode(read->rd_fhp->fh_dentry)->i_size);
  
         *(p++) = htonl(eof);
         *(p++) = htonl(maxcount);
@@ -3456,14 +3458,15 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
         }
         read->rd_vlen = v;
  
+       len = maxcount;
         nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec,
                         read->rd_vlen, &maxcount);
         if (nfserr)
                 return nfserr;
         xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
  
-       eof = (read->rd_offset + maxcount >=
-              d_inode(read->rd_fhp->fh_dentry)->i_size);
+       eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
+                               d_inode(read->rd_fhp->fh_dentry)->i_size);
  
         tmp = htonl(eof);
         write_bytes_to_xdr_buf(xdr->buf, starting_len    , &tmp, 4);
diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h

index d4c4453..7d073b9 100644 (file)
--- a/fs/nfsd/pnfs.h
+++ b/fs/nfsd/pnfs.h
@@ -21,6 +21,7 @@ struct nfsd4_layout_ops {
         u32             notify_types;
  
         __be32 (*proc_getdeviceinfo)(struct super_block *sb,
+                       struct nfs4_client *clp,
                         struct nfsd4_getdeviceinfo *gdevp);
         __be32 (*encode_getdeviceinfo)(struct xdr_stream *xdr,
                         struct nfsd4_getdeviceinfo *gdevp);
@@ -32,10 +33,17 @@ struct nfsd4_layout_ops {
  
         __be32 (*proc_layoutcommit)(struct inode *inode,
                         struct nfsd4_layoutcommit *lcp);
+
+       void (*fence_client)(struct nfs4_layout_stateid *ls);
  };
  
  extern const struct nfsd4_layout_ops *nfsd4_layout_ops[];
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
  extern const struct nfsd4_layout_ops bl_layout_ops;
+#endif
+#ifdef CONFIG_NFSD_SCSILAYOUT
+extern const struct nfsd4_layout_ops scsi_layout_ops;
+#endif
  
  __be32 nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
                 struct nfsd4_compound_state *cstate, stateid_t *stateid,
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h

index c11ba31..2d573ec 100644 (file)
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -139,4 +139,23 @@ static inline int nfsd_create_is_exclusive(int createmode)
                || createmode == NFS4_CREATE_EXCLUSIVE4_1;
  }
  
+static inline bool nfsd_eof_on_read(long requested, long read,
+                               loff_t offset, loff_t size)
+{
+       /* We assume a short read means eof: */
+       if (requested > read)
+               return true;
+       /*
+        * A non-short read might also reach end of file.  The spec
+        * still requires us to set eof in that case.
+        *
+        * Further operations may have modified the file size since
+        * the read, so the following check is not atomic with the read.
+        * We've only seen that cause a problem for a client in the case
+        * where the read returned a count of 0 without setting eof.
+        * That case was fixed by the addition of the above check.
+        */
+       return (offset + read >= size);
+}
+
  #endif /* LINUX_NFSD_VFS_H */
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile

index f646391..3542d94 100644 (file)
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -121,4 +121,5 @@ xfs-$(CONFIG_XFS_RT)                += xfs_rtalloc.o
  xfs-$(CONFIG_XFS_POSIX_ACL)    += xfs_acl.o
  xfs-$(CONFIG_SYSCTL)           += xfs_sysctl.o
  xfs-$(CONFIG_COMPAT)           += xfs_ioctl32.o
-xfs-$(CONFIG_NFSD_PNFS)                += xfs_pnfs.o
+xfs-$(CONFIG_NFSD_BLOCKLAYOUT) += xfs_pnfs.o
+xfs-$(CONFIG_NFSD_SCSILAYOUT)  += xfs_pnfs.o
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c

index 2816d42..a1b2dd8 100644 (file)
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -246,7 +246,7 @@ const struct export_operations xfs_export_operations = {
         .fh_to_parent           = xfs_fs_fh_to_parent,
         .get_parent             = xfs_fs_get_parent,
         .commit_metadata        = xfs_fs_nfs_commit_metadata,
-#ifdef CONFIG_NFSD_PNFS
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
         .get_uuid               = xfs_fs_get_uuid,
         .map_blocks             = xfs_fs_map_blocks,
         .commit_blocks          = xfs_fs_commit_blocks,
diff --git a/fs/xfs/xfs_pnfs.h b/fs/xfs/xfs_pnfs.h

index 8147ac1..93f7485 100644 (file)
--- a/fs/xfs/xfs_pnfs.h
+++ b/fs/xfs/xfs_pnfs.h
@@ -1,7 +1,7 @@
  #ifndef _XFS_PNFS_H
  #define _XFS_PNFS_H 1
  
-#ifdef CONFIG_NFSD_PNFS
+#if defined(CONFIG_NFSD_BLOCKLAYOUT) || defined(CONFIG_NFSD_SCSILAYOUT)
  int xfs_fs_get_uuid(struct super_block *sb, u8 *buf, u32 *len, u64 *offset);
  int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length,
                 struct iomap *iomap, bool write, u32 *device_generation);
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h

index d6f9b4e..0114334 100644 (file)
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -529,6 +529,7 @@ enum pnfs_layouttype {
         LAYOUT_OSD2_OBJECTS = 2,
         LAYOUT_BLOCK_VOLUME = 3,
         LAYOUT_FLEX_FILES = 4,
+       LAYOUT_SCSI = 5,
         LAYOUT_TYPE_MAX
  };
  
@@ -555,6 +556,7 @@ enum pnfs_block_volume_type {
         PNFS_BLOCK_VOLUME_SLICE         = 1,
         PNFS_BLOCK_VOLUME_CONCAT        = 2,
         PNFS_BLOCK_VOLUME_STRIPE        = 3,
+       PNFS_BLOCK_VOLUME_SCSI          = 4,
  };
  
  enum pnfs_block_extent_state {
@@ -568,6 +570,23 @@ enum pnfs_block_extent_state {
  #define PNFS_BLOCK_EXTENT_SIZE \
         (7 * sizeof(__be32) + NFS4_DEVICEID4_SIZE)
  
+/* on the wire size of a scsi commit range */
+#define PNFS_SCSI_RANGE_SIZE \
+       (4 * sizeof(__be32))
+
+enum scsi_code_set {
+       PS_CODE_SET_BINARY      = 1,
+       PS_CODE_SET_ASCII       = 2,
+       PS_CODE_SET_UTF8        = 3
+};
+
+enum scsi_designator_type {
+       PS_DESIGNATOR_T10       = 1,
+       PS_DESIGNATOR_EUI64     = 2,
+       PS_DESIGNATOR_NAA       = 3,
+       PS_DESIGNATOR_NAME      = 8
+};
+
  #define NFL4_UFLG_MASK                 0x0000003F
  #define NFL4_UFLG_DENSE                        0x00000001
  #define NFL4_UFLG_COMMIT_THRU_MDS      0x00000002
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 25 Mar 2016 02:50:32 +0000 (19:50 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 25 Mar 2016 02:50:32 +0000 (19:50 -0700)
Documentation/filesystems/nfs/pnfs-scsi-server.txt	[new file with mode: 0644]	patch \| blob
fs/nfs/blocklayout/blocklayout.c		patch \| blob \| history
fs/nfs/blocklayout/blocklayout.h		patch \| blob \| history
fs/nfs/blocklayout/dev.c		patch \| blob \| history
fs/nfs/blocklayout/extent_tree.c		patch \| blob \| history
fs/nfs/blocklayout/rpc_pipefs.c		patch \| blob \| history
fs/nfsd/Kconfig		patch \| blob \| history
fs/nfsd/Makefile		patch \| blob \| history
fs/nfsd/blocklayout.c		patch \| blob \| history
fs/nfsd/blocklayoutxdr.c		patch \| blob \| history
fs/nfsd/blocklayoutxdr.h		patch \| blob \| history
fs/nfsd/nfs3proc.c		patch \| blob \| history
fs/nfsd/nfs4layouts.c		patch \| blob \| history
fs/nfsd/nfs4proc.c		patch \| blob \| history
fs/nfsd/nfs4xdr.c		patch \| blob \| history
fs/nfsd/pnfs.h		patch \| blob \| history
fs/nfsd/vfs.h		patch \| blob \| history
fs/xfs/Makefile		patch \| blob \| history
fs/xfs/xfs_export.c		patch \| blob \| history
fs/xfs/xfs_pnfs.h		patch \| blob \| history
include/linux/nfs4.h		patch \| blob \| history