4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
27 * Copyright (c) 2011, 2012, Intel Corporation.
30 * This file is part of Lustre, http://www.lustre.org/
31 * Lustre is a trademark of Sun Microsystems, Inc.
33 * lustre/include/lustre_mdc.h
35 * MDS data structures.
36 * See also lustre_idl.h for wire formats of requests.
48 #include <linux/dcache.h>
49 #include "lustre_intent.h"
50 #include "lustre_handles.h"
51 #include "../../include/linux/libcfs/libcfs.h"
52 #include "obd_class.h"
53 #include "lustre/lustre_idl.h"
54 #include "lustre_lib.h"
55 #include "lustre_dlm.h"
56 #include "lustre_export.h"
60 struct ptlrpc_request;
64 * Serializes in-flight MDT-modifying RPC requests to preserve idempotency.
66 * This mutex is used to implement execute-once semantics on the MDT.
67 * The MDT stores the last transaction ID and result for every client in
68 * its last_rcvd file. If the client doesn't get a reply, it can safely
69 * resend the request and the MDT will reconstruct the reply being aware
70 * that the request has already been executed. Without this lock,
71 * execution status of concurrent in-flight requests would be
74 * This design limits the extent to which we can keep a full pipeline of
75 * in-flight requests from a single client. This limitation could be
76 * overcome by allowing multiple slots per client in the last_rcvd file.
79 /** Lock protecting in-flight RPC concurrency. */
80 struct mutex rpcl_mutex;
81 /** Intent associated with currently executing request. */
82 struct lookup_intent *rpcl_it;
83 /** Used for MDS/RPC load testing purposes. */
87 #define MDC_FAKE_RPCL_IT ((void *)0x2c0012bfUL)
89 static inline void mdc_init_rpc_lock(struct mdc_rpc_lock *lck)
91 mutex_init(&lck->rpcl_mutex);
95 static inline void mdc_get_rpc_lock(struct mdc_rpc_lock *lck,
96 struct lookup_intent *it)
98 if (it && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP ||
99 it->it_op == IT_LAYOUT))
102 /* This would normally block until the existing request finishes.
103 * If fail_loc is set it will block until the regular request is
104 * done, then set rpcl_it to MDC_FAKE_RPCL_IT. Once that is set
105 * it will only be cleared when all fake requests are finished.
106 * Only when all fake requests are finished can normal requests
107 * be sent, to ensure they are recoverable again.
110 mutex_lock(&lck->rpcl_mutex);
112 if (CFS_FAIL_CHECK_QUIET(OBD_FAIL_MDC_RPCS_SEM)) {
113 lck->rpcl_it = MDC_FAKE_RPCL_IT;
115 mutex_unlock(&lck->rpcl_mutex);
119 /* This will only happen when the CFS_FAIL_CHECK() was
120 * just turned off but there are still requests in progress.
121 * Wait until they finish. It doesn't need to be efficient
122 * in this extremely rare case, just have low overhead in
123 * the common case when it isn't true.
125 while (unlikely(lck->rpcl_it == MDC_FAKE_RPCL_IT)) {
126 mutex_unlock(&lck->rpcl_mutex);
127 schedule_timeout(cfs_time_seconds(1) / 4);
131 LASSERT(!lck->rpcl_it);
135 static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck,
136 struct lookup_intent *it)
138 if (it && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP ||
139 it->it_op == IT_LAYOUT))
142 if (lck->rpcl_it == MDC_FAKE_RPCL_IT) { /* OBD_FAIL_MDC_RPCS_SEM */
143 mutex_lock(&lck->rpcl_mutex);
145 LASSERTF(lck->rpcl_fakes > 0, "%d\n", lck->rpcl_fakes);
148 if (lck->rpcl_fakes == 0)
152 LASSERTF(it == lck->rpcl_it, "%p != %p\n", it, lck->rpcl_it);
156 mutex_unlock(&lck->rpcl_mutex);
159 /* Update the maximum observed easize and cookiesize. The default easize
160 * and cookiesize is initialized to the minimum value but allowed to grow
161 * up to a single page in size if required to handle the common case.
163 static inline void mdc_update_max_ea_from_body(struct obd_export *exp,
164 struct mdt_body *body)
166 if (body->valid & OBD_MD_FLMODEASIZE) {
167 struct client_obd *cli = &exp->exp_obd->u.cli;
169 if (cli->cl_max_mds_easize < body->max_mdsize) {
170 cli->cl_max_mds_easize = body->max_mdsize;
171 cli->cl_default_mds_easize =
172 min_t(__u32, body->max_mdsize, PAGE_SIZE);
174 if (cli->cl_max_mds_cookiesize < body->max_cookiesize) {
175 cli->cl_max_mds_cookiesize = body->max_cookiesize;
176 cli->cl_default_mds_cookiesize =
177 min_t(__u32, body->max_cookiesize, PAGE_SIZE);
182 struct mdc_cache_waiter {
183 struct list_head mcw_entry;
184 wait_queue_head_t mcw_waitq;
187 /* mdc/mdc_locks.c */
188 int it_open_error(int phase, struct lookup_intent *it);
190 static inline bool cl_is_lov_delay_create(unsigned int flags)
192 return (flags & O_LOV_DELAY_CREATE) == O_LOV_DELAY_CREATE;
195 static inline void cl_lov_delay_create_clear(unsigned int *flags)
197 if ((*flags & O_LOV_DELAY_CREATE) == O_LOV_DELAY_CREATE)
198 *flags &= ~O_LOV_DELAY_CREATE;