Merge tag 'nfs-for-4.8-4' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 12 Sep 2016 21:13:45 +0000 (14:13 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 12 Sep 2016 21:13:45 +0000 (14:13 -0700)
Pull NFS client bugfixes from Trond Myklebust:
 "Highlights include:

  Stable patches:
   - We must serialise LAYOUTGET and LAYOUTRETURN to ensure correct
     state accounting
   - Fix the CREATE_SESSION slot number

  Bugfixes:
   - sunrpc: fix a UDP memory accounting regression
   - NFS: Fix an error reporting regression in nfs_file_write()
   - pNFS: Fix further layout stateid issues
   - RPC/rdma: Revert 3d4cf35bd4fa ("xprtrdma: Reply buffer
     exhaustion...")
   - RPC/rdma: Fix receive buffer accounting"

* tag 'nfs-for-4.8-4' of git://git.linux-nfs.org/projects/trondmy/linux-nfs:
  NFSv4.1: Fix the CREATE_SESSION slot number accounting
  xprtrdma: Fix receive buffer accounting
  xprtrdma: Revert 3d4cf35bd4fa ("xprtrdma: Reply buffer exhaustion...")
  pNFS: Don't forget the layout stateid if there are outstanding LAYOUTGETs
  pNFS: Clear out all layout segments if the server unsets lrp->res.lrs_present
  pNFS: Fix pnfs_set_layout_stateid() to clear NFS_LAYOUT_INVALID_STID
  pNFS: Ensure LAYOUTGET and LAYOUTRETURN are properly serialised
  NFS: Fix error reporting in nfs_file_write()
  sunrpc: fix UDP memory accounting

fs/nfs/file.c
fs/nfs/nfs4proc.c
fs/nfs/pnfs.c
net/sunrpc/xprtrdma/verbs.c
net/sunrpc/xprtrdma/xprt_rdma.h
net/sunrpc/xprtsock.c

index 7d62097..ca699dd 100644 (file)
@@ -657,7 +657,10 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
        if (result <= 0)
                goto out;
 
-       written = generic_write_sync(iocb, result);
+       result = generic_write_sync(iocb, result);
+       if (result < 0)
+               goto out;
+       written = result;
        iocb->ki_pos += written;
 
        /* Return error values */
index f5aecaa..a9dec32 100644 (file)
@@ -7570,12 +7570,20 @@ static int _nfs4_proc_create_session(struct nfs_client *clp,
        status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
        trace_nfs4_create_session(clp, status);
 
+       switch (status) {
+       case -NFS4ERR_STALE_CLIENTID:
+       case -NFS4ERR_DELAY:
+       case -ETIMEDOUT:
+       case -EACCES:
+       case -EAGAIN:
+               goto out;
+       };
+
+       clp->cl_seqid++;
        if (!status) {
                /* Verify the session's negotiated channel_attrs values */
                status = nfs4_verify_channel_attrs(&args, &res);
                /* Increment the clientid slot sequence id */
-               if (clp->cl_seqid == res.seqid)
-                       clp->cl_seqid++;
                if (status)
                        goto out;
                nfs4_update_session(session, &res);
@@ -8190,10 +8198,13 @@ static void nfs4_layoutreturn_release(void *calldata)
 
        dprintk("--> %s\n", __func__);
        spin_lock(&lo->plh_inode->i_lock);
-       pnfs_mark_matching_lsegs_invalid(lo, &freeme, &lrp->args.range,
-                       be32_to_cpu(lrp->args.stateid.seqid));
-       if (lrp->res.lrs_present && pnfs_layout_is_valid(lo))
+       if (lrp->res.lrs_present) {
+               pnfs_mark_matching_lsegs_invalid(lo, &freeme,
+                               &lrp->args.range,
+                               be32_to_cpu(lrp->args.stateid.seqid));
                pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
+       } else
+               pnfs_mark_layout_stateid_invalid(lo, &freeme);
        pnfs_clear_layoutreturn_waitbit(lo);
        spin_unlock(&lo->plh_inode->i_lock);
        nfs4_sequence_free_slot(&lrp->res.seq_res);
index 6daf034..2c93a85 100644 (file)
@@ -365,7 +365,8 @@ pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
        /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */
        atomic_dec(&lo->plh_refcount);
        if (list_empty(&lo->plh_segs)) {
-               set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
+               if (atomic_read(&lo->plh_outstanding) == 0)
+                       set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
                clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
        }
        rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
@@ -768,17 +769,32 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
        pnfs_destroy_layouts_byclid(clp, false);
 }
 
+static void
+pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo)
+{
+       lo->plh_return_iomode = 0;
+       lo->plh_return_seq = 0;
+       clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
+}
+
 /* update lo->plh_stateid with new if is more recent */
 void
 pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
                        bool update_barrier)
 {
        u32 oldseq, newseq, new_barrier = 0;
-       bool invalid = !pnfs_layout_is_valid(lo);
 
        oldseq = be32_to_cpu(lo->plh_stateid.seqid);
        newseq = be32_to_cpu(new->seqid);
-       if (invalid || pnfs_seqid_is_newer(newseq, oldseq)) {
+
+       if (!pnfs_layout_is_valid(lo)) {
+               nfs4_stateid_copy(&lo->plh_stateid, new);
+               lo->plh_barrier = newseq;
+               pnfs_clear_layoutreturn_info(lo);
+               clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
+               return;
+       }
+       if (pnfs_seqid_is_newer(newseq, oldseq)) {
                nfs4_stateid_copy(&lo->plh_stateid, new);
                /*
                 * Because of wraparound, we want to keep the barrier
@@ -790,7 +806,7 @@ pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
                new_barrier = be32_to_cpu(new->seqid);
        else if (new_barrier == 0)
                return;
-       if (invalid || pnfs_seqid_is_newer(new_barrier, lo->plh_barrier))
+       if (pnfs_seqid_is_newer(new_barrier, lo->plh_barrier))
                lo->plh_barrier = new_barrier;
 }
 
@@ -886,19 +902,14 @@ void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
        rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq);
 }
 
-static void
-pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo)
-{
-       lo->plh_return_iomode = 0;
-       lo->plh_return_seq = 0;
-       clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
-}
-
 static bool
 pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo,
                nfs4_stateid *stateid,
                enum pnfs_iomode *iomode)
 {
+       /* Serialise LAYOUTGET/LAYOUTRETURN */
+       if (atomic_read(&lo->plh_outstanding) != 0)
+               return false;
        if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
                return false;
        pnfs_get_layout_hdr(lo);
@@ -1798,16 +1809,11 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
                 */
                pnfs_mark_layout_stateid_invalid(lo, &free_me);
 
-               nfs4_stateid_copy(&lo->plh_stateid, &res->stateid);
-               lo->plh_barrier = be32_to_cpu(res->stateid.seqid);
+               pnfs_set_layout_stateid(lo, &res->stateid, true);
        }
 
        pnfs_get_lseg(lseg);
        pnfs_layout_insert_lseg(lo, lseg, &free_me);
-       if (!pnfs_layout_is_valid(lo)) {
-               pnfs_clear_layoutreturn_info(lo);
-               clear_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
-       }
 
 
        if (res->return_on_close)
index 536d0be..799cce6 100644 (file)
@@ -51,6 +51,7 @@
 #include <linux/slab.h>
 #include <linux/prefetch.h>
 #include <linux/sunrpc/addr.h>
+#include <linux/sunrpc/svc_rdma.h>
 #include <asm/bitops.h>
 #include <linux/module.h> /* try_module_get()/module_put() */
 
@@ -923,7 +924,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
        }
 
        INIT_LIST_HEAD(&buf->rb_recv_bufs);
-       for (i = 0; i < buf->rb_max_requests; i++) {
+       for (i = 0; i < buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS; i++) {
                struct rpcrdma_rep *rep;
 
                rep = rpcrdma_create_rep(r_xprt);
@@ -1018,6 +1019,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
                rep = rpcrdma_buffer_get_rep_locked(buf);
                rpcrdma_destroy_rep(ia, rep);
        }
+       buf->rb_send_count = 0;
 
        spin_lock(&buf->rb_reqslock);
        while (!list_empty(&buf->rb_allreqs)) {
@@ -1032,6 +1034,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
                spin_lock(&buf->rb_reqslock);
        }
        spin_unlock(&buf->rb_reqslock);
+       buf->rb_recv_count = 0;
 
        rpcrdma_destroy_mrs(buf);
 }
@@ -1074,8 +1077,27 @@ rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
        spin_unlock(&buf->rb_mwlock);
 }
 
+static struct rpcrdma_rep *
+rpcrdma_buffer_get_rep(struct rpcrdma_buffer *buffers)
+{
+       /* If an RPC previously completed without a reply (say, a
+        * credential problem or a soft timeout occurs) then hold off
+        * on supplying more Receive buffers until the number of new
+        * pending RPCs catches up to the number of posted Receives.
+        */
+       if (unlikely(buffers->rb_send_count < buffers->rb_recv_count))
+               return NULL;
+
+       if (unlikely(list_empty(&buffers->rb_recv_bufs)))
+               return NULL;
+       buffers->rb_recv_count++;
+       return rpcrdma_buffer_get_rep_locked(buffers);
+}
+
 /*
  * Get a set of request/reply buffers.
+ *
+ * Reply buffer (if available) is attached to send buffer upon return.
  */
 struct rpcrdma_req *
 rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
@@ -1085,21 +1107,15 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
        spin_lock(&buffers->rb_lock);
        if (list_empty(&buffers->rb_send_bufs))
                goto out_reqbuf;
+       buffers->rb_send_count++;
        req = rpcrdma_buffer_get_req_locked(buffers);
-       if (list_empty(&buffers->rb_recv_bufs))
-               goto out_repbuf;
-       req->rl_reply = rpcrdma_buffer_get_rep_locked(buffers);
+       req->rl_reply = rpcrdma_buffer_get_rep(buffers);
        spin_unlock(&buffers->rb_lock);
        return req;
 
 out_reqbuf:
        spin_unlock(&buffers->rb_lock);
-       pr_warn("rpcrdma: out of request buffers (%p)\n", buffers);
-       return NULL;
-out_repbuf:
-       list_add(&req->rl_free, &buffers->rb_send_bufs);
-       spin_unlock(&buffers->rb_lock);
-       pr_warn("rpcrdma: out of reply buffers (%p)\n", buffers);
+       pr_warn("RPC:       %s: out of request buffers\n", __func__);
        return NULL;
 }
 
@@ -1117,9 +1133,12 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
        req->rl_reply = NULL;
 
        spin_lock(&buffers->rb_lock);
+       buffers->rb_send_count--;
        list_add_tail(&req->rl_free, &buffers->rb_send_bufs);
-       if (rep)
+       if (rep) {
+               buffers->rb_recv_count--;
                list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);
+       }
        spin_unlock(&buffers->rb_lock);
 }
 
@@ -1133,8 +1152,7 @@ rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
        struct rpcrdma_buffer *buffers = req->rl_buffer;
 
        spin_lock(&buffers->rb_lock);
-       if (!list_empty(&buffers->rb_recv_bufs))
-               req->rl_reply = rpcrdma_buffer_get_rep_locked(buffers);
+       req->rl_reply = rpcrdma_buffer_get_rep(buffers);
        spin_unlock(&buffers->rb_lock);
 }
 
@@ -1148,6 +1166,7 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
        struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf;
 
        spin_lock(&buffers->rb_lock);
+       buffers->rb_recv_count--;
        list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);
        spin_unlock(&buffers->rb_lock);
 }
index 670fad5..a71b0f5 100644 (file)
@@ -321,6 +321,7 @@ struct rpcrdma_buffer {
        char                    *rb_pool;
 
        spinlock_t              rb_lock;        /* protect buf lists */
+       int                     rb_send_count, rb_recv_count;
        struct list_head        rb_send_bufs;
        struct list_head        rb_recv_bufs;
        u32                     rb_max_requests;
index 8ede3bc..bf16883 100644 (file)
@@ -1074,7 +1074,7 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
                skb = skb_recv_datagram(sk, 0, 1, &err);
                if (skb != NULL) {
                        xs_udp_data_read_skb(&transport->xprt, sk, skb);
-                       skb_free_datagram(sk, skb);
+                       skb_free_datagram_locked(sk, skb);
                        continue;
                }
                if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))