Merge tag 'nfsd-4.6-1' of git://linux-nfs.org/~bfields/linux
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Mar 2016 02:50:32 +0000 (19:50 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Mar 2016 02:50:32 +0000 (19:50 -0700)
Pull more nfsd updates from Bruce Fields:
 "Apologies for the previous request, which omitted the top 8 commits
  from my for-next branch (including the SCSI layout commits).  Thanks
  to Trond for spotting my error!"

This actually includes the new layout types, so here's that part of
the pull message repeated:

 "Support for a new pnfs layout type from Christoph Hellwig.  The new
  layout type is a variant of the block layout which uses SCSI features
  to offer improved fencing and device identification.

  Note this pull request also includes the client side of SCSI layout,
  with Trond's permission"

* tag 'nfsd-4.6-1' of git://linux-nfs.org/~bfields/linux:
  nfsd: use short read as well as i_size to set eof
  nfsd: better layoutupdate bounds-checking
  nfsd: block and scsi layout drivers need to depend on CONFIG_BLOCK
  nfsd: add SCSI layout support
  nfsd: move some blocklayout code
  nfsd: add a new config option for the block layout driver
  nfs/blocklayout: add SCSI layout support
  nfs4.h: add SCSI layout definitions

21 files changed:
Documentation/filesystems/nfs/pnfs-scsi-server.txt [new file with mode: 0644]
fs/nfs/blocklayout/blocklayout.c
fs/nfs/blocklayout/blocklayout.h
fs/nfs/blocklayout/dev.c
fs/nfs/blocklayout/extent_tree.c
fs/nfs/blocklayout/rpc_pipefs.c
fs/nfsd/Kconfig
fs/nfsd/Makefile
fs/nfsd/blocklayout.c
fs/nfsd/blocklayoutxdr.c
fs/nfsd/blocklayoutxdr.h
fs/nfsd/nfs3proc.c
fs/nfsd/nfs4layouts.c
fs/nfsd/nfs4proc.c
fs/nfsd/nfs4xdr.c
fs/nfsd/pnfs.h
fs/nfsd/vfs.h
fs/xfs/Makefile
fs/xfs/xfs_export.c
fs/xfs/xfs_pnfs.h
include/linux/nfs4.h

diff --git a/Documentation/filesystems/nfs/pnfs-scsi-server.txt b/Documentation/filesystems/nfs/pnfs-scsi-server.txt
new file mode 100644 (file)
index 0000000..5bef726
--- /dev/null
@@ -0,0 +1,23 @@
+
+pNFS SCSI layout server user guide
+==================================
+
+This document describes support for pNFS SCSI layouts in the Linux NFS server.
+With pNFS SCSI layouts, the NFS server acts as Metadata Server (MDS) for pNFS,
+which in addition to handling all the metadata access to the NFS export,
+also hands out layouts to the clients so that they can directly access the
+underlying SCSI LUNs that are shared with the client.
+
+To use pNFS SCSI layouts with with the Linux NFS server, the exported file
+system needs to support the pNFS SCSI layouts (currently just XFS), and the
+file system must sit on a SCSI LUN that is accessible to the clients in
+addition to the MDS.  As of now the file system needs to sit directly on the
+exported LUN, striping or concatenation of LUNs on the MDS and clients
+is not supported yet.
+
+On a server built with CONFIG_NFSD_SCSI, the pNFS SCSI volume support is
+automatically enabled if the file system is exported using the "pnfs"
+option and the underlying SCSI device support persistent reservations.
+On the client make sure the kernel has the CONFIG_PNFS_BLOCK option
+enabled, and the file system is mounted using the NFSv4.1 protocol
+version (mount -o vers=4.1).
index 8bc870e..02e4d87 100644 (file)
@@ -446,8 +446,8 @@ static void bl_free_layout_hdr(struct pnfs_layout_hdr *lo)
        kfree(bl);
 }
 
-static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode,
-                                                  gfp_t gfp_flags)
+static struct pnfs_layout_hdr *__bl_alloc_layout_hdr(struct inode *inode,
+               gfp_t gfp_flags, bool is_scsi_layout)
 {
        struct pnfs_block_layout *bl;
 
@@ -460,9 +460,22 @@ static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode,
        bl->bl_ext_ro = RB_ROOT;
        spin_lock_init(&bl->bl_ext_lock);
 
+       bl->bl_scsi_layout = is_scsi_layout;
        return &bl->bl_layout;
 }
 
+static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode,
+                                                  gfp_t gfp_flags)
+{
+       return __bl_alloc_layout_hdr(inode, gfp_flags, false);
+}
+
+static struct pnfs_layout_hdr *sl_alloc_layout_hdr(struct inode *inode,
+                                                  gfp_t gfp_flags)
+{
+       return __bl_alloc_layout_hdr(inode, gfp_flags, true);
+}
+
 static void bl_free_lseg(struct pnfs_layout_segment *lseg)
 {
        dprintk("%s enter\n", __func__);
@@ -889,22 +902,53 @@ static struct pnfs_layoutdriver_type blocklayout_type = {
        .sync                           = pnfs_generic_sync,
 };
 
+static struct pnfs_layoutdriver_type scsilayout_type = {
+       .id                             = LAYOUT_SCSI,
+       .name                           = "LAYOUT_SCSI",
+       .owner                          = THIS_MODULE,
+       .flags                          = PNFS_LAYOUTRET_ON_SETATTR |
+                                         PNFS_READ_WHOLE_PAGE,
+       .read_pagelist                  = bl_read_pagelist,
+       .write_pagelist                 = bl_write_pagelist,
+       .alloc_layout_hdr               = sl_alloc_layout_hdr,
+       .free_layout_hdr                = bl_free_layout_hdr,
+       .alloc_lseg                     = bl_alloc_lseg,
+       .free_lseg                      = bl_free_lseg,
+       .return_range                   = bl_return_range,
+       .prepare_layoutcommit           = bl_prepare_layoutcommit,
+       .cleanup_layoutcommit           = bl_cleanup_layoutcommit,
+       .set_layoutdriver               = bl_set_layoutdriver,
+       .alloc_deviceid_node            = bl_alloc_deviceid_node,
+       .free_deviceid_node             = bl_free_deviceid_node,
+       .pg_read_ops                    = &bl_pg_read_ops,
+       .pg_write_ops                   = &bl_pg_write_ops,
+       .sync                           = pnfs_generic_sync,
+};
+
+
 static int __init nfs4blocklayout_init(void)
 {
        int ret;
 
        dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__);
 
-       ret = pnfs_register_layoutdriver(&blocklayout_type);
+       ret = bl_init_pipefs();
        if (ret)
                goto out;
-       ret = bl_init_pipefs();
+
+       ret = pnfs_register_layoutdriver(&blocklayout_type);
        if (ret)
-               goto out_unregister;
+               goto out_cleanup_pipe;
+
+       ret = pnfs_register_layoutdriver(&scsilayout_type);
+       if (ret)
+               goto out_unregister_block;
        return 0;
 
-out_unregister:
+out_unregister_block:
        pnfs_unregister_layoutdriver(&blocklayout_type);
+out_cleanup_pipe:
+       bl_cleanup_pipefs();
 out:
        return ret;
 }
@@ -914,8 +958,9 @@ static void __exit nfs4blocklayout_exit(void)
        dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n",
               __func__);
 
-       bl_cleanup_pipefs();
+       pnfs_unregister_layoutdriver(&scsilayout_type);
        pnfs_unregister_layoutdriver(&blocklayout_type);
+       bl_cleanup_pipefs();
 }
 
 MODULE_ALIAS("nfs-layouttype4-3");
index c556640..bc21205 100644 (file)
@@ -55,7 +55,6 @@ struct pnfs_block_dev;
  */
 #define PNFS_BLOCK_UUID_LEN    128
 
-
 struct pnfs_block_volume {
        enum pnfs_block_volume_type     type;
        union {
@@ -82,6 +81,13 @@ struct pnfs_block_volume {
                        u32             volumes_count;
                        u32             volumes[PNFS_BLOCK_MAX_DEVICES];
                } stripe;
+               struct {
+                       enum scsi_code_set              code_set;
+                       enum scsi_designator_type       designator_type;
+                       int                             designator_len;
+                       u8                              designator[256];
+                       u64                             pr_key;
+               } scsi;
        };
 };
 
@@ -106,6 +112,9 @@ struct pnfs_block_dev {
        struct block_device             *bdev;
        u64                             disk_offset;
 
+       u64                             pr_key;
+       bool                            pr_registered;
+
        bool (*map)(struct pnfs_block_dev *dev, u64 offset,
                        struct pnfs_block_dev_map *map);
 };
@@ -131,6 +140,7 @@ struct pnfs_block_layout {
        struct rb_root          bl_ext_rw;
        struct rb_root          bl_ext_ro;
        spinlock_t              bl_ext_lock;   /* Protects list manipulation */
+       bool                    bl_scsi_layout;
 };
 
 static inline struct pnfs_block_layout *
@@ -182,6 +192,6 @@ void ext_tree_mark_committed(struct nfs4_layoutcommit_args *arg, int status);
 dev_t bl_resolve_deviceid(struct nfs_server *server,
                struct pnfs_block_volume *b, gfp_t gfp_mask);
 int __init bl_init_pipefs(void);
-void __exit bl_cleanup_pipefs(void);
+void bl_cleanup_pipefs(void);
 
 #endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
index a861bbd..e5b8967 100644 (file)
@@ -1,11 +1,12 @@
 /*
- * Copyright (c) 2014 Christoph Hellwig.
+ * Copyright (c) 2014-2016 Christoph Hellwig.
  */
 #include <linux/sunrpc/svc.h>
 #include <linux/blkdev.h>
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_xdr.h>
+#include <linux/pr.h>
 
 #include "blocklayout.h"
 
@@ -21,6 +22,17 @@ bl_free_device(struct pnfs_block_dev *dev)
                        bl_free_device(&dev->children[i]);
                kfree(dev->children);
        } else {
+               if (dev->pr_registered) {
+                       const struct pr_ops *ops =
+                               dev->bdev->bd_disk->fops->pr_ops;
+                       int error;
+
+                       error = ops->pr_register(dev->bdev, dev->pr_key, 0,
+                               false);
+                       if (error)
+                               pr_err("failed to unregister PR key.\n");
+               }
+
                if (dev->bdev)
                        blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE);
        }
@@ -113,6 +125,24 @@ nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
                for (i = 0; i < b->stripe.volumes_count; i++)
                        b->stripe.volumes[i] = be32_to_cpup(p++);
                break;
+       case PNFS_BLOCK_VOLUME_SCSI:
+               p = xdr_inline_decode(xdr, 4 + 4 + 4);
+               if (!p)
+                       return -EIO;
+               b->scsi.code_set = be32_to_cpup(p++);
+               b->scsi.designator_type = be32_to_cpup(p++);
+               b->scsi.designator_len = be32_to_cpup(p++);
+               p = xdr_inline_decode(xdr, b->scsi.designator_len);
+               if (!p)
+                       return -EIO;
+               if (b->scsi.designator_len > 256)
+                       return -EIO;
+               memcpy(&b->scsi.designator, p, b->scsi.designator_len);
+               p = xdr_inline_decode(xdr, 8);
+               if (!p)
+                       return -EIO;
+               p = xdr_decode_hyper(p, &b->scsi.pr_key);
+               break;
        default:
                dprintk("unknown volume type!\n");
                return -EIO;
@@ -216,6 +246,116 @@ bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
        return 0;
 }
 
+static bool
+bl_validate_designator(struct pnfs_block_volume *v)
+{
+       switch (v->scsi.designator_type) {
+       case PS_DESIGNATOR_EUI64:
+               if (v->scsi.code_set != PS_CODE_SET_BINARY)
+                       return false;
+
+               if (v->scsi.designator_len != 8 &&
+                   v->scsi.designator_len != 10 &&
+                   v->scsi.designator_len != 16)
+                       return false;
+
+               return true;
+       case PS_DESIGNATOR_NAA:
+               if (v->scsi.code_set != PS_CODE_SET_BINARY)
+                       return false;
+
+               if (v->scsi.designator_len != 8 &&
+                   v->scsi.designator_len != 16)
+                       return false;
+
+               return true;
+       case PS_DESIGNATOR_T10:
+       case PS_DESIGNATOR_NAME:
+               pr_err("pNFS: unsupported designator "
+                       "(code set %d, type %d, len %d.\n",
+                       v->scsi.code_set,
+                       v->scsi.designator_type,
+                       v->scsi.designator_len);
+               return false;
+       default:
+               pr_err("pNFS: invalid designator "
+                       "(code set %d, type %d, len %d.\n",
+                       v->scsi.code_set,
+                       v->scsi.designator_type,
+                       v->scsi.designator_len);
+               return false;
+       }
+}
+
+static int
+bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
+               struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
+{
+       struct pnfs_block_volume *v = &volumes[idx];
+       const struct pr_ops *ops;
+       const char *devname;
+       int error;
+
+       if (!bl_validate_designator(v))
+               return -EINVAL;
+
+       switch (v->scsi.designator_len) {
+       case 8:
+               devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%8phN",
+                               v->scsi.designator);
+               break;
+       case 12:
+               devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%12phN",
+                               v->scsi.designator);
+               break;
+       case 16:
+               devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%16phN",
+                               v->scsi.designator);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       d->bdev = blkdev_get_by_path(devname, FMODE_READ, NULL);
+       if (IS_ERR(d->bdev)) {
+               pr_warn("pNFS: failed to open device %s (%ld)\n",
+                       devname, PTR_ERR(d->bdev));
+               kfree(devname);
+               return PTR_ERR(d->bdev);
+       }
+
+       kfree(devname);
+
+       d->len = i_size_read(d->bdev->bd_inode);
+       d->map = bl_map_simple;
+       d->pr_key = v->scsi.pr_key;
+
+       pr_info("pNFS: using block device %s (reservation key 0x%llx)\n",
+               d->bdev->bd_disk->disk_name, d->pr_key);
+
+       ops = d->bdev->bd_disk->fops->pr_ops;
+       if (!ops) {
+               pr_err("pNFS: block device %s does not support reservations.",
+                               d->bdev->bd_disk->disk_name);
+               error = -EINVAL;
+               goto out_blkdev_put;
+       }
+
+       error = ops->pr_register(d->bdev, 0, d->pr_key, true);
+       if (error) {
+               pr_err("pNFS: failed to register key for block device %s.",
+                               d->bdev->bd_disk->disk_name);
+               goto out_blkdev_put;
+       }
+
+       d->pr_registered = true;
+       return 0;
+
+out_blkdev_put:
+       blkdev_put(d->bdev, FMODE_READ);
+       return error;
+}
+
 static int
 bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
                struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
@@ -303,6 +443,8 @@ bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
                return bl_parse_concat(server, d, volumes, idx, gfp_mask);
        case PNFS_BLOCK_VOLUME_STRIPE:
                return bl_parse_stripe(server, d, volumes, idx, gfp_mask);
+       case PNFS_BLOCK_VOLUME_SCSI:
+               return bl_parse_scsi(server, d, volumes, idx, gfp_mask);
        default:
                dprintk("unsupported volume type: %d\n", volumes[idx].type);
                return -EIO;
index 35ab51c..720b3ff 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 Christoph Hellwig.
+ * Copyright (c) 2014-2016 Christoph Hellwig.
  */
 
 #include <linux/vmalloc.h>
@@ -462,10 +462,12 @@ out:
        return err;
 }
 
-static size_t ext_tree_layoutupdate_size(size_t count)
+static size_t ext_tree_layoutupdate_size(struct pnfs_block_layout *bl, size_t count)
 {
-       return sizeof(__be32) /* number of entries */ +
-               PNFS_BLOCK_EXTENT_SIZE * count;
+       if (bl->bl_scsi_layout)
+               return sizeof(__be32) + PNFS_SCSI_RANGE_SIZE * count;
+       else
+               return sizeof(__be32) + PNFS_BLOCK_EXTENT_SIZE * count;
 }
 
 static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg,
@@ -483,6 +485,23 @@ static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg,
        }
 }
 
+static __be32 *encode_block_extent(struct pnfs_block_extent *be, __be32 *p)
+{
+       p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data,
+                       NFS4_DEVICEID4_SIZE);
+       p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT);
+       p = xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
+       p = xdr_encode_hyper(p, 0LL);
+       *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA);
+       return p;
+}
+
+static __be32 *encode_scsi_range(struct pnfs_block_extent *be, __be32 *p)
+{
+       p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT);
+       return xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
+}
+
 static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
                size_t buffer_size, size_t *count)
 {
@@ -496,19 +515,16 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
                        continue;
 
                (*count)++;
-               if (ext_tree_layoutupdate_size(*count) > buffer_size) {
+               if (ext_tree_layoutupdate_size(bl, *count) > buffer_size) {
                        /* keep counting.. */
                        ret = -ENOSPC;
                        continue;
                }
 
-               p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data,
-                               NFS4_DEVICEID4_SIZE);
-               p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT);
-               p = xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
-               p = xdr_encode_hyper(p, 0LL);
-               *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA);
-
+               if (bl->bl_scsi_layout)
+                       p = encode_scsi_range(be, p);
+               else
+                       p = encode_block_extent(be, p);
                be->be_tag = EXTENT_COMMITTING;
        }
        spin_unlock(&bl->bl_ext_lock);
@@ -537,7 +553,7 @@ retry:
        if (unlikely(ret)) {
                ext_tree_free_commitdata(arg, buffer_size);
 
-               buffer_size = ext_tree_layoutupdate_size(count);
+               buffer_size = ext_tree_layoutupdate_size(bl, count);
                count = 0;
 
                arg->layoutupdate_pages =
@@ -556,7 +572,7 @@ retry:
        }
 
        *start_p = cpu_to_be32(count);
-       arg->layoutupdate_len = ext_tree_layoutupdate_size(count);
+       arg->layoutupdate_len = ext_tree_layoutupdate_size(bl, count);
 
        if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) {
                void *p = start_p, *end = p + arg->layoutupdate_len;
index dbe5839..9fb067a 100644 (file)
@@ -281,7 +281,7 @@ out:
        return ret;
 }
 
-void __exit bl_cleanup_pipefs(void)
+void bl_cleanup_pipefs(void)
 {
        rpc_pipefs_notifier_unregister(&nfs4blocklayout_block);
        unregister_pernet_subsys(&nfs4blocklayout_net_ops);
index a0b77fc..c9f583d 100644 (file)
@@ -84,12 +84,30 @@ config NFSD_V4
          If unsure, say N.
 
 config NFSD_PNFS
-       bool "NFSv4.1 server support for Parallel NFS (pNFS)"
-       depends on NFSD_V4
+       bool
+
+config NFSD_BLOCKLAYOUT
+       bool "NFSv4.1 server support for pNFS block layouts"
+       depends on NFSD_V4 && BLOCK
+       select NFSD_PNFS
+       help
+         This option enables support for the exporting pNFS block layouts
+         in the kernel's NFS server. The pNFS block layout enables NFS
+         clients to directly perform I/O to block devices accesible to both
+         the server and the clients.  See RFC 5663 for more details.
+
+         If unsure, say N.
+
+config NFSD_SCSILAYOUT
+       bool "NFSv4.1 server support for pNFS SCSI layouts"
+       depends on NFSD_V4 && BLOCK
+       select NFSD_PNFS
        help
-         This option enables support for the parallel NFS features of the
-         minor version 1 of the NFSv4 protocol (RFC5661) in the kernel's NFS
-         server.
+         This option enables support for the exporting pNFS SCSI layouts
+         in the kernel's NFS server. The pNFS SCSI layout enables NFS
+         clients to directly perform I/O to SCSI devices accesible to both
+         the server and the clients.  See draft-ietf-nfsv4-scsi-layout for
+         more details.
 
          If unsure, say N.
 
index 9a6028e..3ae5f3c 100644 (file)
@@ -17,4 +17,6 @@ nfsd-$(CONFIG_NFSD_V3)        += nfs3proc.o nfs3xdr.o
 nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
 nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
                           nfs4acl.o nfs4callback.o nfs4recover.o
-nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o blocklayout.o blocklayoutxdr.o
+nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o
+nfsd-$(CONFIG_NFSD_BLOCKLAYOUT) += blocklayout.o blocklayoutxdr.o
+nfsd-$(CONFIG_NFSD_SCSILAYOUT) += blocklayout.o blocklayoutxdr.o
index c29d942..e55b524 100644 (file)
@@ -1,11 +1,14 @@
 /*
- * Copyright (c) 2014 Christoph Hellwig.
+ * Copyright (c) 2014-2016 Christoph Hellwig.
  */
 #include <linux/exportfs.h>
 #include <linux/genhd.h>
 #include <linux/slab.h>
+#include <linux/pr.h>
 
 #include <linux/nfsd/debug.h>
+#include <scsi/scsi_proto.h>
+#include <scsi/scsi_common.h>
 
 #include "blocklayoutxdr.h"
 #include "pnfs.h"
 #define NFSDDBG_FACILITY       NFSDDBG_PNFS
 
 
-static int
-nfsd4_block_get_device_info_simple(struct super_block *sb,
-               struct nfsd4_getdeviceinfo *gdp)
-{
-       struct pnfs_block_deviceaddr *dev;
-       struct pnfs_block_volume *b;
-
-       dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) +
-                     sizeof(struct pnfs_block_volume), GFP_KERNEL);
-       if (!dev)
-               return -ENOMEM;
-       gdp->gd_device = dev;
-
-       dev->nr_volumes = 1;
-       b = &dev->volumes[0];
-
-       b->type = PNFS_BLOCK_VOLUME_SIMPLE;
-       b->simple.sig_len = PNFS_BLOCK_UUID_LEN;
-       return sb->s_export_op->get_uuid(sb, b->simple.sig, &b->simple.sig_len,
-                       &b->simple.offset);
-}
-
-static __be32
-nfsd4_block_proc_getdeviceinfo(struct super_block *sb,
-               struct nfsd4_getdeviceinfo *gdp)
-{
-       if (sb->s_bdev != sb->s_bdev->bd_contains)
-               return nfserr_inval;
-       return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp));
-}
-
 static __be32
 nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
                struct nfsd4_layoutget *args)
@@ -141,20 +113,13 @@ out_layoutunavailable:
 }
 
 static __be32
-nfsd4_block_proc_layoutcommit(struct inode *inode,
-               struct nfsd4_layoutcommit *lcp)
+nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp,
+               struct iomap *iomaps, int nr_iomaps)
 {
        loff_t new_size = lcp->lc_last_wr + 1;
        struct iattr iattr = { .ia_valid = 0 };
-       struct iomap *iomaps;
-       int nr_iomaps;
        int error;
 
-       nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
-                       lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits);
-       if (nr_iomaps < 0)
-               return nfserrno(nr_iomaps);
-
        if (lcp->lc_mtime.tv_nsec == UTIME_NOW ||
            timespec_compare(&lcp->lc_mtime, &inode->i_mtime) < 0)
                lcp->lc_mtime = current_fs_time(inode->i_sb);
@@ -172,6 +137,54 @@ nfsd4_block_proc_layoutcommit(struct inode *inode,
        return nfserrno(error);
 }
 
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
+static int
+nfsd4_block_get_device_info_simple(struct super_block *sb,
+               struct nfsd4_getdeviceinfo *gdp)
+{
+       struct pnfs_block_deviceaddr *dev;
+       struct pnfs_block_volume *b;
+
+       dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) +
+                     sizeof(struct pnfs_block_volume), GFP_KERNEL);
+       if (!dev)
+               return -ENOMEM;
+       gdp->gd_device = dev;
+
+       dev->nr_volumes = 1;
+       b = &dev->volumes[0];
+
+       b->type = PNFS_BLOCK_VOLUME_SIMPLE;
+       b->simple.sig_len = PNFS_BLOCK_UUID_LEN;
+       return sb->s_export_op->get_uuid(sb, b->simple.sig, &b->simple.sig_len,
+                       &b->simple.offset);
+}
+
+static __be32
+nfsd4_block_proc_getdeviceinfo(struct super_block *sb,
+               struct nfs4_client *clp,
+               struct nfsd4_getdeviceinfo *gdp)
+{
+       if (sb->s_bdev != sb->s_bdev->bd_contains)
+               return nfserr_inval;
+       return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp));
+}
+
+static __be32
+nfsd4_block_proc_layoutcommit(struct inode *inode,
+               struct nfsd4_layoutcommit *lcp)
+{
+       struct iomap *iomaps;
+       int nr_iomaps;
+
+       nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
+                       lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits);
+       if (nr_iomaps < 0)
+               return nfserrno(nr_iomaps);
+
+       return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
+}
+
 const struct nfsd4_layout_ops bl_layout_ops = {
        /*
         * Pretend that we send notification to the client.  This is a blatant
@@ -190,3 +203,206 @@ const struct nfsd4_layout_ops bl_layout_ops = {
        .encode_layoutget       = nfsd4_block_encode_layoutget,
        .proc_layoutcommit      = nfsd4_block_proc_layoutcommit,
 };
+#endif /* CONFIG_NFSD_BLOCKLAYOUT */
+
+#ifdef CONFIG_NFSD_SCSILAYOUT
+static int nfsd4_scsi_identify_device(struct block_device *bdev,
+               struct pnfs_block_volume *b)
+{
+       struct request_queue *q = bdev->bd_disk->queue;
+       struct request *rq;
+       size_t bufflen = 252, len, id_len;
+       u8 *buf, *d, type, assoc;
+       int error;
+
+       buf = kzalloc(bufflen, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       rq = blk_get_request(q, READ, GFP_KERNEL);
+       if (IS_ERR(rq)) {
+               error = -ENOMEM;
+               goto out_free_buf;
+       }
+       blk_rq_set_block_pc(rq);
+
+       error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL);
+       if (error)
+               goto out_put_request;
+
+       rq->cmd[0] = INQUIRY;
+       rq->cmd[1] = 1;
+       rq->cmd[2] = 0x83;
+       rq->cmd[3] = bufflen >> 8;
+       rq->cmd[4] = bufflen & 0xff;
+       rq->cmd_len = COMMAND_SIZE(INQUIRY);
+
+       error = blk_execute_rq(rq->q, NULL, rq, 1);
+       if (error) {
+               pr_err("pNFS: INQUIRY 0x83 failed with: %x\n",
+                       rq->errors);
+               goto out_put_request;
+       }
+
+       len = (buf[2] << 8) + buf[3] + 4;
+       if (len > bufflen) {
+               pr_err("pNFS: INQUIRY 0x83 response invalid (len = %zd)\n",
+                       len);
+               goto out_put_request;
+       }
+
+       d = buf + 4;
+       for (d = buf + 4; d < buf + len; d += id_len + 4) {
+               id_len = d[3];
+               type = d[1] & 0xf;
+               assoc = (d[1] >> 4) & 0x3;
+
+               /*
+                * We only care about a EUI-64 and NAA designator types
+                * with LU association.
+                */
+               if (assoc != 0x00)
+                       continue;
+               if (type != 0x02 && type != 0x03)
+                       continue;
+               if (id_len != 8 && id_len != 12 && id_len != 16)
+                       continue;
+
+               b->scsi.code_set = PS_CODE_SET_BINARY;
+               b->scsi.designator_type = type == 0x02 ?
+                       PS_DESIGNATOR_EUI64 : PS_DESIGNATOR_NAA;
+               b->scsi.designator_len = id_len;
+               memcpy(b->scsi.designator, d + 4, id_len);
+
+               /*
+                * If we found a 8 or 12 byte descriptor continue on to
+                * see if a 16 byte one is available.  If we find a
+                * 16 byte descriptor we're done.
+                */
+               if (id_len == 16)
+                       break;
+       }
+
+out_put_request:
+       blk_put_request(rq);
+out_free_buf:
+       kfree(buf);
+       return error;
+}
+
+#define NFSD_MDS_PR_KEY                0x0100000000000000
+
+/*
+ * We use the client ID as a unique key for the reservations.
+ * This allows us to easily fence a client when recalls fail.
+ */
+static u64 nfsd4_scsi_pr_key(struct nfs4_client *clp)
+{
+       return ((u64)clp->cl_clientid.cl_boot << 32) | clp->cl_clientid.cl_id;
+}
+
+static int
+nfsd4_block_get_device_info_scsi(struct super_block *sb,
+               struct nfs4_client *clp,
+               struct nfsd4_getdeviceinfo *gdp)
+{
+       struct pnfs_block_deviceaddr *dev;
+       struct pnfs_block_volume *b;
+       const struct pr_ops *ops;
+       int error;
+
+       dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) +
+                     sizeof(struct pnfs_block_volume), GFP_KERNEL);
+       if (!dev)
+               return -ENOMEM;
+       gdp->gd_device = dev;
+
+       dev->nr_volumes = 1;
+       b = &dev->volumes[0];
+
+       b->type = PNFS_BLOCK_VOLUME_SCSI;
+       b->scsi.pr_key = nfsd4_scsi_pr_key(clp);
+
+       error = nfsd4_scsi_identify_device(sb->s_bdev, b);
+       if (error)
+               return error;
+
+       ops = sb->s_bdev->bd_disk->fops->pr_ops;
+       if (!ops) {
+               pr_err("pNFS: device %s does not support PRs.\n",
+                       sb->s_id);
+               return -EINVAL;
+       }
+
+       error = ops->pr_register(sb->s_bdev, 0, NFSD_MDS_PR_KEY, true);
+       if (error) {
+               pr_err("pNFS: failed to register key for device %s.\n",
+                       sb->s_id);
+               return -EINVAL;
+       }
+
+       error = ops->pr_reserve(sb->s_bdev, NFSD_MDS_PR_KEY,
+                       PR_EXCLUSIVE_ACCESS_REG_ONLY, 0);
+       if (error) {
+               pr_err("pNFS: failed to reserve device %s.\n",
+                       sb->s_id);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static __be32
+nfsd4_scsi_proc_getdeviceinfo(struct super_block *sb,
+               struct nfs4_client *clp,
+               struct nfsd4_getdeviceinfo *gdp)
+{
+       if (sb->s_bdev != sb->s_bdev->bd_contains)
+               return nfserr_inval;
+       return nfserrno(nfsd4_block_get_device_info_scsi(sb, clp, gdp));
+}
+static __be32
+nfsd4_scsi_proc_layoutcommit(struct inode *inode,
+               struct nfsd4_layoutcommit *lcp)
+{
+       struct iomap *iomaps;
+       int nr_iomaps;
+
+       nr_iomaps = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout,
+                       lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits);
+       if (nr_iomaps < 0)
+               return nfserrno(nr_iomaps);
+
+       return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
+}
+
+static void
+nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls)
+{
+       struct nfs4_client *clp = ls->ls_stid.sc_client;
+       struct block_device *bdev = ls->ls_file->f_path.mnt->mnt_sb->s_bdev;
+
+       bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
+                       nfsd4_scsi_pr_key(clp), 0, true);
+}
+
+const struct nfsd4_layout_ops scsi_layout_ops = {
+       /*
+        * Pretend that we send notification to the client.  This is a blatant
+        * lie to force recent Linux clients to cache our device IDs.
+        * We rarely ever change the device ID, so the harm of leaking deviceids
+        * for a while isn't too bad.  Unfortunately RFC5661 is a complete mess
+        * in this regard, but I filed errata 4119 for this a while ago, and
+        * hopefully the Linux client will eventually start caching deviceids
+        * without this again.
+        */
+       .notify_types           =
+                       NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
+       .proc_getdeviceinfo     = nfsd4_scsi_proc_getdeviceinfo,
+       .encode_getdeviceinfo   = nfsd4_block_encode_getdeviceinfo,
+       .proc_layoutget         = nfsd4_block_proc_layoutget,
+       .encode_layoutget       = nfsd4_block_encode_layoutget,
+       .proc_layoutcommit      = nfsd4_scsi_proc_layoutcommit,
+       .fence_client           = nfsd4_scsi_fence_client,
+};
+#endif /* CONFIG_NFSD_SCSILAYOUT */
index 6d834dc..6c3b316 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 Christoph Hellwig.
+ * Copyright (c) 2014-2016 Christoph Hellwig.
  */
 #include <linux/sunrpc/svc.h>
 #include <linux/exportfs.h>
@@ -53,6 +53,18 @@ nfsd4_block_encode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
                p = xdr_encode_hyper(p, b->simple.offset);
                p = xdr_encode_opaque(p, b->simple.sig, b->simple.sig_len);
                break;
+       case PNFS_BLOCK_VOLUME_SCSI:
+               len = 4 + 4 + 4 + 4 + b->scsi.designator_len + 8;
+               p = xdr_reserve_space(xdr, len);
+               if (!p)
+                       return -ETOOSMALL;
+
+               *p++ = cpu_to_be32(b->type);
+               *p++ = cpu_to_be32(b->scsi.code_set);
+               *p++ = cpu_to_be32(b->scsi.designator_type);
+               p = xdr_encode_opaque(p, b->scsi.designator, b->scsi.designator_len);
+               p = xdr_encode_hyper(p, b->scsi.pr_key);
+               break;
        default:
                return -ENOTSUPP;
        }
@@ -93,18 +105,22 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
                u32 block_size)
 {
        struct iomap *iomaps;
-       u32 nr_iomaps, expected, i;
+       u32 nr_iomaps, i;
 
        if (len < sizeof(u32)) {
                dprintk("%s: extent array too small: %u\n", __func__, len);
                return -EINVAL;
        }
+       len -= sizeof(u32);
+       if (len % PNFS_BLOCK_EXTENT_SIZE) {
+               dprintk("%s: extent array invalid: %u\n", __func__, len);
+               return -EINVAL;
+       }
 
        nr_iomaps = be32_to_cpup(p++);
-       expected = sizeof(__be32) + nr_iomaps * PNFS_BLOCK_EXTENT_SIZE;
-       if (len != expected) {
+       if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE) {
                dprintk("%s: extent array size mismatch: %u/%u\n",
-                       __func__, len, expected);
+                       __func__, len, nr_iomaps);
                return -EINVAL;
        }
 
@@ -155,3 +171,54 @@ fail:
        kfree(iomaps);
        return -EINVAL;
 }
+
+int
+nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
+               u32 block_size)
+{
+       struct iomap *iomaps;
+       u32 nr_iomaps, expected, i;
+
+       if (len < sizeof(u32)) {
+               dprintk("%s: extent array too small: %u\n", __func__, len);
+               return -EINVAL;
+       }
+
+       nr_iomaps = be32_to_cpup(p++);
+       expected = sizeof(__be32) + nr_iomaps * PNFS_SCSI_RANGE_SIZE;
+       if (len != expected) {
+               dprintk("%s: extent array size mismatch: %u/%u\n",
+                       __func__, len, expected);
+               return -EINVAL;
+       }
+
+       iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL);
+       if (!iomaps) {
+               dprintk("%s: failed to allocate extent array\n", __func__);
+               return -ENOMEM;
+       }
+
+       for (i = 0; i < nr_iomaps; i++) {
+               u64 val;
+
+               p = xdr_decode_hyper(p, &val);
+               if (val & (block_size - 1)) {
+                       dprintk("%s: unaligned offset 0x%llx\n", __func__, val);
+                       goto fail;
+               }
+               iomaps[i].offset = val;
+
+               p = xdr_decode_hyper(p, &val);
+               if (val & (block_size - 1)) {
+                       dprintk("%s: unaligned length 0x%llx\n", __func__, val);
+                       goto fail;
+               }
+               iomaps[i].length = val;
+       }
+
+       *iomapp = iomaps;
+       return nr_iomaps;
+fail:
+       kfree(iomaps);
+       return -EINVAL;
+}
index 6de925f..397bc75 100644 (file)
@@ -15,6 +15,11 @@ struct pnfs_block_extent {
        enum pnfs_block_extent_state    es;
 };
 
+struct pnfs_block_range {
+       u64                             foff;
+       u64                             len;
+};
+
 /*
  * Random upper cap for the uuid length to avoid unbounded allocation.
  * Not actually limited by the protocol.
@@ -29,6 +34,13 @@ struct pnfs_block_volume {
                        u32             sig_len;
                        u8              sig[PNFS_BLOCK_UUID_LEN];
                } simple;
+               struct {
+                       enum scsi_code_set              code_set;
+                       enum scsi_designator_type       designator_type;
+                       int                             designator_len;
+                       u8                              designator[256];
+                       u64                             pr_key;
+               } scsi;
        };
 };
 
@@ -43,5 +55,7 @@ __be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr,
                struct nfsd4_layoutget *lgp);
 int nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
                u32 block_size);
+int nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
+               u32 block_size);
 
 #endif /* _NFSD_BLOCKLAYOUTXDR_H */
index 7b755b7..51c3b06 100644 (file)
@@ -147,6 +147,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
 {
        __be32  nfserr;
        u32     max_blocksize = svc_max_payload(rqstp);
+       unsigned long cnt = min(argp->count, max_blocksize);
 
        dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
                                SVCFH_fmt(&argp->fh),
@@ -157,7 +158,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
         * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
         * + 1 (xdr opaque byte count) = 26
         */
-       resp->count = min(argp->count, max_blocksize);
+       resp->count = cnt;
        svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
 
        fh_copy(&resp->fh, &argp->fh);
@@ -167,8 +168,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
                                  &resp->count);
        if (nfserr == 0) {
                struct inode    *inode = d_inode(resp->fh.fh_dentry);
-
-               resp->eof = (argp->offset + resp->count) >= inode->i_size;
+               resp->eof = nfsd_eof_on_read(cnt, resp->count, argp->offset,
+                                                       inode->i_size);
        }
 
        RETURN_STATUS(nfserr);
index ce2d010..825c7bc 100644 (file)
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2014 Christoph Hellwig.
  */
+#include <linux/blkdev.h>
 #include <linux/kmod.h>
 #include <linux/file.h>
 #include <linux/jhash.h>
@@ -26,7 +27,12 @@ static const struct nfsd4_callback_ops nfsd4_cb_layout_ops;
 static const struct lock_manager_operations nfsd4_layouts_lm_ops;
 
 const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] =  {
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
        [LAYOUT_BLOCK_VOLUME]   = &bl_layout_ops,
+#endif
+#ifdef CONFIG_NFSD_SCSILAYOUT
+       [LAYOUT_SCSI]           = &scsi_layout_ops,
+#endif
 };
 
 /* pNFS device ID to export fsid mapping */
@@ -121,10 +127,24 @@ void nfsd4_setup_layout_type(struct svc_export *exp)
        if (!(exp->ex_flags & NFSEXP_PNFS))
                return;
 
+       /*
+        * Check if the file system supports exporting a block-like layout.
+        * If the block device supports reservations prefer the SCSI layout,
+        * otherwise advertise the block layout.
+        */
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
        if (sb->s_export_op->get_uuid &&
            sb->s_export_op->map_blocks &&
            sb->s_export_op->commit_blocks)
                exp->ex_layout_type = LAYOUT_BLOCK_VOLUME;
+#endif
+#ifdef CONFIG_NFSD_SCSILAYOUT
+       /* overwrite block layout selection if needed */
+       if (sb->s_export_op->map_blocks &&
+           sb->s_export_op->commit_blocks &&
+           sb->s_bdev && sb->s_bdev->bd_disk->fops->pr_ops)
+               exp->ex_layout_type = LAYOUT_SCSI;
+#endif
 }
 
 static void
@@ -590,8 +610,6 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
 
        rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str));
 
-       trace_layout_recall_fail(&ls->ls_stid.sc_stateid);
-
        printk(KERN_WARNING
                "nfsd: client %s failed to respond to layout recall. "
                "  Fencing..\n", addr_str);
@@ -626,6 +644,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
                container_of(cb, struct nfs4_layout_stateid, ls_recall);
        struct nfsd_net *nn;
        ktime_t now, cutoff;
+       const struct nfsd4_layout_ops *ops;
        LIST_HEAD(reaplist);
 
 
@@ -661,7 +680,13 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
                /*
                 * Unknown error or non-responding client, we'll need to fence.
                 */
-               nfsd4_cb_layout_fail(ls);
+               trace_layout_recall_fail(&ls->ls_stid.sc_stateid);
+
+               ops = nfsd4_layout_ops[ls->ls_layout_type];
+               if (ops->fence_client)
+                       ops->fence_client(ls);
+               else
+                       nfsd4_cb_layout_fail(ls);
                return -1;
        }
 }
index 40b9124..de1ff1d 100644 (file)
@@ -1268,8 +1268,10 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp,
                goto out;
 
        nfserr = nfs_ok;
-       if (gdp->gd_maxcount != 0)
-               nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp);
+       if (gdp->gd_maxcount != 0) {
+               nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb,
+                                       cstate->session->se_client, gdp);
+       }
 
        gdp->gd_notify_types &= ops->notify_types;
 out:
index aa87954..9df898b 100644 (file)
@@ -3365,6 +3365,7 @@ static __be32 nfsd4_encode_splice_read(
        struct xdr_stream *xdr = &resp->xdr;
        struct xdr_buf *buf = xdr->buf;
        u32 eof;
+       long len;
        int space_left;
        __be32 nfserr;
        __be32 *p = xdr->p - 2;
@@ -3373,6 +3374,7 @@ static __be32 nfsd4_encode_splice_read(
        if (xdr->end - xdr->p < 1)
                return nfserr_resource;
 
+       len = maxcount;
        nfserr = nfsd_splice_read(read->rd_rqstp, file,
                                  read->rd_offset, &maxcount);
        if (nfserr) {
@@ -3385,8 +3387,8 @@ static __be32 nfsd4_encode_splice_read(
                return nfserr;
        }
 
-       eof = (read->rd_offset + maxcount >=
-              d_inode(read->rd_fhp->fh_dentry)->i_size);
+       eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
+                               d_inode(read->rd_fhp->fh_dentry)->i_size);
 
        *(p++) = htonl(eof);
        *(p++) = htonl(maxcount);
@@ -3456,14 +3458,15 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
        }
        read->rd_vlen = v;
 
+       len = maxcount;
        nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec,
                        read->rd_vlen, &maxcount);
        if (nfserr)
                return nfserr;
        xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
 
-       eof = (read->rd_offset + maxcount >=
-              d_inode(read->rd_fhp->fh_dentry)->i_size);
+       eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
+                               d_inode(read->rd_fhp->fh_dentry)->i_size);
 
        tmp = htonl(eof);
        write_bytes_to_xdr_buf(xdr->buf, starting_len    , &tmp, 4);
index d4c4453..7d073b9 100644 (file)
@@ -21,6 +21,7 @@ struct nfsd4_layout_ops {
        u32             notify_types;
 
        __be32 (*proc_getdeviceinfo)(struct super_block *sb,
+                       struct nfs4_client *clp,
                        struct nfsd4_getdeviceinfo *gdevp);
        __be32 (*encode_getdeviceinfo)(struct xdr_stream *xdr,
                        struct nfsd4_getdeviceinfo *gdevp);
@@ -32,10 +33,17 @@ struct nfsd4_layout_ops {
 
        __be32 (*proc_layoutcommit)(struct inode *inode,
                        struct nfsd4_layoutcommit *lcp);
+
+       void (*fence_client)(struct nfs4_layout_stateid *ls);
 };
 
 extern const struct nfsd4_layout_ops *nfsd4_layout_ops[];
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
 extern const struct nfsd4_layout_ops bl_layout_ops;
+#endif
+#ifdef CONFIG_NFSD_SCSILAYOUT
+extern const struct nfsd4_layout_ops scsi_layout_ops;
+#endif
 
 __be32 nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
                struct nfsd4_compound_state *cstate, stateid_t *stateid,
index c11ba31..2d573ec 100644 (file)
@@ -139,4 +139,23 @@ static inline int nfsd_create_is_exclusive(int createmode)
               || createmode == NFS4_CREATE_EXCLUSIVE4_1;
 }
 
+static inline bool nfsd_eof_on_read(long requested, long read,
+                               loff_t offset, loff_t size)
+{
+       /* We assume a short read means eof: */
+       if (requested > read)
+               return true;
+       /*
+        * A non-short read might also reach end of file.  The spec
+        * still requires us to set eof in that case.
+        *
+        * Further operations may have modified the file size since
+        * the read, so the following check is not atomic with the read.
+        * We've only seen that cause a problem for a client in the case
+        * where the read returned a count of 0 without setting eof.
+        * That case was fixed by the addition of the above check.
+        */
+       return (offset + read >= size);
+}
+
 #endif /* LINUX_NFSD_VFS_H */
index f646391..3542d94 100644 (file)
@@ -121,4 +121,5 @@ xfs-$(CONFIG_XFS_RT)                += xfs_rtalloc.o
 xfs-$(CONFIG_XFS_POSIX_ACL)    += xfs_acl.o
 xfs-$(CONFIG_SYSCTL)           += xfs_sysctl.o
 xfs-$(CONFIG_COMPAT)           += xfs_ioctl32.o
-xfs-$(CONFIG_NFSD_PNFS)                += xfs_pnfs.o
+xfs-$(CONFIG_NFSD_BLOCKLAYOUT) += xfs_pnfs.o
+xfs-$(CONFIG_NFSD_SCSILAYOUT)  += xfs_pnfs.o
index 2816d42..a1b2dd8 100644 (file)
@@ -246,7 +246,7 @@ const struct export_operations xfs_export_operations = {
        .fh_to_parent           = xfs_fs_fh_to_parent,
        .get_parent             = xfs_fs_get_parent,
        .commit_metadata        = xfs_fs_nfs_commit_metadata,
-#ifdef CONFIG_NFSD_PNFS
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
        .get_uuid               = xfs_fs_get_uuid,
        .map_blocks             = xfs_fs_map_blocks,
        .commit_blocks          = xfs_fs_commit_blocks,
index 8147ac1..93f7485 100644 (file)
@@ -1,7 +1,7 @@
 #ifndef _XFS_PNFS_H
 #define _XFS_PNFS_H 1
 
-#ifdef CONFIG_NFSD_PNFS
+#if defined(CONFIG_NFSD_BLOCKLAYOUT) || defined(CONFIG_NFSD_SCSILAYOUT)
 int xfs_fs_get_uuid(struct super_block *sb, u8 *buf, u32 *len, u64 *offset);
 int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length,
                struct iomap *iomap, bool write, u32 *device_generation);
index d6f9b4e..0114334 100644 (file)
@@ -529,6 +529,7 @@ enum pnfs_layouttype {
        LAYOUT_OSD2_OBJECTS = 2,
        LAYOUT_BLOCK_VOLUME = 3,
        LAYOUT_FLEX_FILES = 4,
+       LAYOUT_SCSI = 5,
        LAYOUT_TYPE_MAX
 };
 
@@ -555,6 +556,7 @@ enum pnfs_block_volume_type {
        PNFS_BLOCK_VOLUME_SLICE         = 1,
        PNFS_BLOCK_VOLUME_CONCAT        = 2,
        PNFS_BLOCK_VOLUME_STRIPE        = 3,
+       PNFS_BLOCK_VOLUME_SCSI          = 4,
 };
 
 enum pnfs_block_extent_state {
@@ -568,6 +570,23 @@ enum pnfs_block_extent_state {
 #define PNFS_BLOCK_EXTENT_SIZE \
        (7 * sizeof(__be32) + NFS4_DEVICEID4_SIZE)
 
+/* on the wire size of a scsi commit range */
+#define PNFS_SCSI_RANGE_SIZE \
+       (4 * sizeof(__be32))
+
+enum scsi_code_set {
+       PS_CODE_SET_BINARY      = 1,
+       PS_CODE_SET_ASCII       = 2,
+       PS_CODE_SET_UTF8        = 3
+};
+
+enum scsi_designator_type {
+       PS_DESIGNATOR_T10       = 1,
+       PS_DESIGNATOR_EUI64     = 2,
+       PS_DESIGNATOR_NAA       = 3,
+       PS_DESIGNATOR_NAME      = 8
+};
+
 #define NFL4_UFLG_MASK                 0x0000003F
 #define NFL4_UFLG_DENSE                        0x00000001
 #define NFL4_UFLG_COMMIT_THRU_MDS      0x00000002