4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
31 * Copyright (c) 2011, 2012, Intel Corporation.
34 * This file is part of Lustre, http://www.lustre.org/
35 * Lustre is a trademark of Sun Microsystems, Inc.
37 * lustre/include/lustre_mdc.h
39 * MDS data structures.
40 * See also lustre_idl.h for wire formats of requests.
52 #include <linux/dcache.h>
53 #include "lustre_intent.h"
54 #include "lustre_handles.h"
55 #include "../../include/linux/libcfs/libcfs.h"
56 #include "obd_class.h"
57 #include "lustre/lustre_idl.h"
58 #include "lustre_lib.h"
59 #include "lustre_dlm.h"
60 #include "lustre_export.h"
64 struct ptlrpc_request;
68 * Serializes in-flight MDT-modifying RPC requests to preserve idempotency.
70 * This mutex is used to implement execute-once semantics on the MDT.
71 * The MDT stores the last transaction ID and result for every client in
72 * its last_rcvd file. If the client doesn't get a reply, it can safely
73 * resend the request and the MDT will reconstruct the reply being aware
74 * that the request has already been executed. Without this lock,
75 * execution status of concurrent in-flight requests would be
78 * This design limits the extent to which we can keep a full pipeline of
79 * in-flight requests from a single client. This limitation could be
80 * overcome by allowing multiple slots per client in the last_rcvd file.
83 /** Lock protecting in-flight RPC concurrency. */
84 struct mutex rpcl_mutex;
85 /** Intent associated with currently executing request. */
86 struct lookup_intent *rpcl_it;
87 /** Used for MDS/RPC load testing purposes. */
91 #define MDC_FAKE_RPCL_IT ((void *)0x2c0012bfUL)
93 static inline void mdc_init_rpc_lock(struct mdc_rpc_lock *lck)
95 mutex_init(&lck->rpcl_mutex);
99 static inline void mdc_get_rpc_lock(struct mdc_rpc_lock *lck,
100 struct lookup_intent *it)
102 if (it && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP ||
103 it->it_op == IT_LAYOUT))
106 /* This would normally block until the existing request finishes.
107 * If fail_loc is set it will block until the regular request is
108 * done, then set rpcl_it to MDC_FAKE_RPCL_IT. Once that is set
109 * it will only be cleared when all fake requests are finished.
110 * Only when all fake requests are finished can normal requests
111 * be sent, to ensure they are recoverable again.
114 mutex_lock(&lck->rpcl_mutex);
116 if (CFS_FAIL_CHECK_QUIET(OBD_FAIL_MDC_RPCS_SEM)) {
117 lck->rpcl_it = MDC_FAKE_RPCL_IT;
119 mutex_unlock(&lck->rpcl_mutex);
123 /* This will only happen when the CFS_FAIL_CHECK() was
124 * just turned off but there are still requests in progress.
125 * Wait until they finish. It doesn't need to be efficient
126 * in this extremely rare case, just have low overhead in
127 * the common case when it isn't true.
129 while (unlikely(lck->rpcl_it == MDC_FAKE_RPCL_IT)) {
130 mutex_unlock(&lck->rpcl_mutex);
131 schedule_timeout(cfs_time_seconds(1) / 4);
135 LASSERT(!lck->rpcl_it);
139 static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck,
140 struct lookup_intent *it)
142 if (it && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP ||
143 it->it_op == IT_LAYOUT))
146 if (lck->rpcl_it == MDC_FAKE_RPCL_IT) { /* OBD_FAIL_MDC_RPCS_SEM */
147 mutex_lock(&lck->rpcl_mutex);
149 LASSERTF(lck->rpcl_fakes > 0, "%d\n", lck->rpcl_fakes);
152 if (lck->rpcl_fakes == 0)
156 LASSERTF(it == lck->rpcl_it, "%p != %p\n", it, lck->rpcl_it);
160 mutex_unlock(&lck->rpcl_mutex);
163 /* Update the maximum observed easize and cookiesize. The default easize
164 * and cookiesize is initialized to the minimum value but allowed to grow
165 * up to a single page in size if required to handle the common case.
167 static inline void mdc_update_max_ea_from_body(struct obd_export *exp,
168 struct mdt_body *body)
170 if (body->valid & OBD_MD_FLMODEASIZE) {
171 struct client_obd *cli = &exp->exp_obd->u.cli;
173 if (cli->cl_max_mds_easize < body->max_mdsize) {
174 cli->cl_max_mds_easize = body->max_mdsize;
175 cli->cl_default_mds_easize =
176 min_t(__u32, body->max_mdsize, PAGE_SIZE);
178 if (cli->cl_max_mds_cookiesize < body->max_cookiesize) {
179 cli->cl_max_mds_cookiesize = body->max_cookiesize;
180 cli->cl_default_mds_cookiesize =
181 min_t(__u32, body->max_cookiesize, PAGE_SIZE);
186 struct mdc_cache_waiter {
187 struct list_head mcw_entry;
188 wait_queue_head_t mcw_waitq;
191 /* mdc/mdc_locks.c */
192 int it_disposition(struct lookup_intent *it, int flag);
193 void it_clear_disposition(struct lookup_intent *it, int flag);
194 void it_set_disposition(struct lookup_intent *it, int flag);
195 int it_open_error(int phase, struct lookup_intent *it);
197 static inline bool cl_is_lov_delay_create(unsigned int flags)
199 return (flags & O_LOV_DELAY_CREATE) == O_LOV_DELAY_CREATE;
202 static inline void cl_lov_delay_create_clear(unsigned int *flags)
204 if ((*flags & O_LOV_DELAY_CREATE) == O_LOV_DELAY_CREATE)
205 *flags &= ~O_LOV_DELAY_CREATE;