Merge branch 'for-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetoot...
[cascardo/linux.git] / drivers / staging / lustre / lustre / obdecho / echo_client.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2012, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  */
36
37 #define DEBUG_SUBSYSTEM S_ECHO
38 #include <linux/libcfs/libcfs.h>
39
40 #include <obd.h>
41 #include <obd_support.h>
42 #include <obd_class.h>
43 #include <lustre_debug.h>
44 #include <lprocfs_status.h>
45 #include <cl_object.h>
46 #include <md_object.h>
47 #include <lustre_fid.h>
48 #include <lustre_acl.h>
49 #include <lustre_net.h>
50 #include <obd_lov.h>
51
52 #include "echo_internal.h"
53
54 /** \defgroup echo_client Echo Client
55  * @{
56  */
57
58 struct echo_device {
59         struct cl_device        ed_cl;
60         struct echo_client_obd *ed_ec;
61
62         struct cl_site    ed_site_myself;
63         struct cl_site   *ed_site;
64         struct lu_device       *ed_next;
65         int                  ed_next_islov;
66         int                  ed_next_ismd;
67         struct lu_client_seq   *ed_cl_seq;
68 };
69
70 struct echo_object {
71         struct cl_object        eo_cl;
72         struct cl_object_header eo_hdr;
73
74         struct echo_device     *eo_dev;
75         struct list_head              eo_obj_chain;
76         struct lov_stripe_md   *eo_lsm;
77         atomic_t            eo_npages;
78         int                  eo_deleted;
79 };
80
81 struct echo_object_conf {
82         struct cl_object_conf  eoc_cl;
83         struct lov_stripe_md **eoc_md;
84 };
85
86 struct echo_page {
87         struct cl_page_slice   ep_cl;
88         struct mutex            ep_lock;
89         struct page         *ep_vmpage;
90 };
91
92 struct echo_lock {
93         struct cl_lock_slice   el_cl;
94         struct list_head             el_chain;
95         struct echo_object    *el_object;
96         __u64             el_cookie;
97         atomic_t           el_refcount;
98 };
99
100 struct echo_io {
101         struct cl_io_slice     ei_cl;
102 };
103
104 #if 0
105 struct echo_req {
106         struct cl_req_slice er_cl;
107 };
108 #endif
109
110 static int echo_client_setup(const struct lu_env *env,
111                              struct obd_device *obddev,
112                              struct lustre_cfg *lcfg);
113 static int echo_client_cleanup(struct obd_device *obddev);
114
115
116 /** \defgroup echo_helpers Helper functions
117  * @{
118  */
119 static inline struct echo_device *cl2echo_dev(const struct cl_device *dev)
120 {
121         return container_of0(dev, struct echo_device, ed_cl);
122 }
123
124 static inline struct cl_device *echo_dev2cl(struct echo_device *d)
125 {
126         return &d->ed_cl;
127 }
128
129 static inline struct echo_device *obd2echo_dev(const struct obd_device *obd)
130 {
131         return cl2echo_dev(lu2cl_dev(obd->obd_lu_dev));
132 }
133
134 static inline struct cl_object *echo_obj2cl(struct echo_object *eco)
135 {
136         return &eco->eo_cl;
137 }
138
139 static inline struct echo_object *cl2echo_obj(const struct cl_object *o)
140 {
141         return container_of(o, struct echo_object, eo_cl);
142 }
143
144 static inline struct echo_page *cl2echo_page(const struct cl_page_slice *s)
145 {
146         return container_of(s, struct echo_page, ep_cl);
147 }
148
149 static inline struct echo_lock *cl2echo_lock(const struct cl_lock_slice *s)
150 {
151         return container_of(s, struct echo_lock, el_cl);
152 }
153
154 static inline struct cl_lock *echo_lock2cl(const struct echo_lock *ecl)
155 {
156         return ecl->el_cl.cls_lock;
157 }
158
159 static struct lu_context_key echo_thread_key;
160 static inline struct echo_thread_info *echo_env_info(const struct lu_env *env)
161 {
162         struct echo_thread_info *info;
163         info = lu_context_key_get(&env->le_ctx, &echo_thread_key);
164         LASSERT(info != NULL);
165         return info;
166 }
167
168 static inline
169 struct echo_object_conf *cl2echo_conf(const struct cl_object_conf *c)
170 {
171         return container_of(c, struct echo_object_conf, eoc_cl);
172 }
173
174 /** @} echo_helpers */
175
176 static struct echo_object *cl_echo_object_find(struct echo_device *d,
177                                                struct lov_stripe_md **lsm);
178 static int cl_echo_object_put(struct echo_object *eco);
179 static int cl_echo_enqueue   (struct echo_object *eco, obd_off start,
180                               obd_off end, int mode, __u64 *cookie);
181 static int cl_echo_cancel    (struct echo_device *d, __u64 cookie);
182 static int cl_echo_object_brw(struct echo_object *eco, int rw, obd_off offset,
183                               struct page **pages, int npages, int async);
184
185 static struct echo_thread_info *echo_env_info(const struct lu_env *env);
186
187 struct echo_thread_info {
188         struct echo_object_conf eti_conf;
189         struct lustre_md        eti_md;
190
191         struct cl_2queue        eti_queue;
192         struct cl_io        eti_io;
193         struct cl_lock_descr    eti_descr;
194         struct lu_fid      eti_fid;
195         struct lu_fid           eti_fid2;
196         struct md_op_spec       eti_spec;
197         struct lov_mds_md_v3    eti_lmm;
198         struct lov_user_md_v3   eti_lum;
199         struct md_attr    eti_ma;
200         struct lu_name    eti_lname;
201         /* per-thread values, can be re-used */
202         void                    *eti_big_lmm;
203         int                     eti_big_lmmsize;
204         char                eti_name[20];
205         struct lu_buf      eti_buf;
206         char                eti_xattr_buf[LUSTRE_POSIX_ACL_MAX_SIZE];
207 };
208
209 /* No session used right now */
210 struct echo_session_info {
211         unsigned long dummy;
212 };
213
214 static struct kmem_cache *echo_lock_kmem;
215 static struct kmem_cache *echo_object_kmem;
216 static struct kmem_cache *echo_thread_kmem;
217 static struct kmem_cache *echo_session_kmem;
218 //static struct kmem_cache *echo_req_kmem;
219
220 static struct lu_kmem_descr echo_caches[] = {
221         {
222                 .ckd_cache = &echo_lock_kmem,
223                 .ckd_name  = "echo_lock_kmem",
224                 .ckd_size  = sizeof (struct echo_lock)
225         },
226         {
227                 .ckd_cache = &echo_object_kmem,
228                 .ckd_name  = "echo_object_kmem",
229                 .ckd_size  = sizeof (struct echo_object)
230         },
231         {
232                 .ckd_cache = &echo_thread_kmem,
233                 .ckd_name  = "echo_thread_kmem",
234                 .ckd_size  = sizeof (struct echo_thread_info)
235         },
236         {
237                 .ckd_cache = &echo_session_kmem,
238                 .ckd_name  = "echo_session_kmem",
239                 .ckd_size  = sizeof (struct echo_session_info)
240         },
241 #if 0
242         {
243                 .ckd_cache = &echo_req_kmem,
244                 .ckd_name  = "echo_req_kmem",
245                 .ckd_size  = sizeof (struct echo_req)
246         },
247 #endif
248         {
249                 .ckd_cache = NULL
250         }
251 };
252
253 /** \defgroup echo_page Page operations
254  *
255  * Echo page operations.
256  *
257  * @{
258  */
259 static struct page *echo_page_vmpage(const struct lu_env *env,
260                                     const struct cl_page_slice *slice)
261 {
262         return cl2echo_page(slice)->ep_vmpage;
263 }
264
265 static int echo_page_own(const struct lu_env *env,
266                          const struct cl_page_slice *slice,
267                          struct cl_io *io, int nonblock)
268 {
269         struct echo_page *ep = cl2echo_page(slice);
270
271         if (!nonblock)
272                 mutex_lock(&ep->ep_lock);
273         else if (!mutex_trylock(&ep->ep_lock))
274                 return -EAGAIN;
275         return 0;
276 }
277
278 static void echo_page_disown(const struct lu_env *env,
279                              const struct cl_page_slice *slice,
280                              struct cl_io *io)
281 {
282         struct echo_page *ep = cl2echo_page(slice);
283
284         LASSERT(mutex_is_locked(&ep->ep_lock));
285         mutex_unlock(&ep->ep_lock);
286 }
287
288 static void echo_page_discard(const struct lu_env *env,
289                               const struct cl_page_slice *slice,
290                               struct cl_io *unused)
291 {
292         cl_page_delete(env, slice->cpl_page);
293 }
294
295 static int echo_page_is_vmlocked(const struct lu_env *env,
296                                  const struct cl_page_slice *slice)
297 {
298         if (mutex_is_locked(&cl2echo_page(slice)->ep_lock))
299                 return -EBUSY;
300         return -ENODATA;
301 }
302
303 static void echo_page_completion(const struct lu_env *env,
304                                  const struct cl_page_slice *slice,
305                                  int ioret)
306 {
307         LASSERT(slice->cpl_page->cp_sync_io != NULL);
308 }
309
310 static void echo_page_fini(const struct lu_env *env,
311                            struct cl_page_slice *slice)
312 {
313         struct echo_page *ep    = cl2echo_page(slice);
314         struct echo_object *eco = cl2echo_obj(slice->cpl_obj);
315         struct page *vmpage      = ep->ep_vmpage;
316
317         atomic_dec(&eco->eo_npages);
318         page_cache_release(vmpage);
319 }
320
321 static int echo_page_prep(const struct lu_env *env,
322                           const struct cl_page_slice *slice,
323                           struct cl_io *unused)
324 {
325         return 0;
326 }
327
328 static int echo_page_print(const struct lu_env *env,
329                            const struct cl_page_slice *slice,
330                            void *cookie, lu_printer_t printer)
331 {
332         struct echo_page *ep = cl2echo_page(slice);
333
334         (*printer)(env, cookie, LUSTRE_ECHO_CLIENT_NAME"-page@%p %d vm@%p\n",
335                    ep, mutex_is_locked(&ep->ep_lock), ep->ep_vmpage);
336         return 0;
337 }
338
339 static const struct cl_page_operations echo_page_ops = {
340         .cpo_own           = echo_page_own,
341         .cpo_disown     = echo_page_disown,
342         .cpo_discard       = echo_page_discard,
343         .cpo_vmpage     = echo_page_vmpage,
344         .cpo_fini         = echo_page_fini,
345         .cpo_print       = echo_page_print,
346         .cpo_is_vmlocked   = echo_page_is_vmlocked,
347         .io = {
348                 [CRT_READ] = {
349                         .cpo_prep       = echo_page_prep,
350                         .cpo_completion  = echo_page_completion,
351                 },
352                 [CRT_WRITE] = {
353                         .cpo_prep       = echo_page_prep,
354                         .cpo_completion  = echo_page_completion,
355                 }
356         }
357 };
358 /** @} echo_page */
359
360 /** \defgroup echo_lock Locking
361  *
362  * echo lock operations
363  *
364  * @{
365  */
366 static void echo_lock_fini(const struct lu_env *env,
367                            struct cl_lock_slice *slice)
368 {
369         struct echo_lock *ecl = cl2echo_lock(slice);
370
371         LASSERT(list_empty(&ecl->el_chain));
372         OBD_SLAB_FREE_PTR(ecl, echo_lock_kmem);
373 }
374
375 static void echo_lock_delete(const struct lu_env *env,
376                              const struct cl_lock_slice *slice)
377 {
378         struct echo_lock *ecl      = cl2echo_lock(slice);
379
380         LASSERT(list_empty(&ecl->el_chain));
381 }
382
383 static int echo_lock_fits_into(const struct lu_env *env,
384                                const struct cl_lock_slice *slice,
385                                const struct cl_lock_descr *need,
386                                const struct cl_io *unused)
387 {
388         return 1;
389 }
390
391 static struct cl_lock_operations echo_lock_ops = {
392         .clo_fini      = echo_lock_fini,
393         .clo_delete    = echo_lock_delete,
394         .clo_fits_into = echo_lock_fits_into
395 };
396
397 /** @} echo_lock */
398
399 /** \defgroup echo_cl_ops cl_object operations
400  *
401  * operations for cl_object
402  *
403  * @{
404  */
405 static int echo_page_init(const struct lu_env *env, struct cl_object *obj,
406                         struct cl_page *page, struct page *vmpage)
407 {
408         struct echo_page *ep = cl_object_page_slice(obj, page);
409         struct echo_object *eco = cl2echo_obj(obj);
410
411         ep->ep_vmpage = vmpage;
412         page_cache_get(vmpage);
413         mutex_init(&ep->ep_lock);
414         cl_page_slice_add(page, &ep->ep_cl, obj, &echo_page_ops);
415         atomic_inc(&eco->eo_npages);
416         return 0;
417 }
418
419 static int echo_io_init(const struct lu_env *env, struct cl_object *obj,
420                         struct cl_io *io)
421 {
422         return 0;
423 }
424
425 static int echo_lock_init(const struct lu_env *env,
426                           struct cl_object *obj, struct cl_lock *lock,
427                           const struct cl_io *unused)
428 {
429         struct echo_lock *el;
430
431         OBD_SLAB_ALLOC_PTR_GFP(el, echo_lock_kmem, __GFP_IO);
432         if (el != NULL) {
433                 cl_lock_slice_add(lock, &el->el_cl, obj, &echo_lock_ops);
434                 el->el_object = cl2echo_obj(obj);
435                 INIT_LIST_HEAD(&el->el_chain);
436                 atomic_set(&el->el_refcount, 0);
437         }
438         return el == NULL ? -ENOMEM : 0;
439 }
440
441 static int echo_conf_set(const struct lu_env *env, struct cl_object *obj,
442                          const struct cl_object_conf *conf)
443 {
444         return 0;
445 }
446
447 static const struct cl_object_operations echo_cl_obj_ops = {
448         .coo_page_init = echo_page_init,
449         .coo_lock_init = echo_lock_init,
450         .coo_io_init   = echo_io_init,
451         .coo_conf_set  = echo_conf_set
452 };
453 /** @} echo_cl_ops */
454
455 /** \defgroup echo_lu_ops lu_object operations
456  *
457  * operations for echo lu object.
458  *
459  * @{
460  */
461 static int echo_object_init(const struct lu_env *env, struct lu_object *obj,
462                             const struct lu_object_conf *conf)
463 {
464         struct echo_device *ed   = cl2echo_dev(lu2cl_dev(obj->lo_dev));
465         struct echo_client_obd *ec     = ed->ed_ec;
466         struct echo_object *eco = cl2echo_obj(lu2cl(obj));
467
468         if (ed->ed_next) {
469                 struct lu_object  *below;
470                 struct lu_device  *under;
471
472                 under = ed->ed_next;
473                 below = under->ld_ops->ldo_object_alloc(env, obj->lo_header,
474                                                         under);
475                 if (below == NULL)
476                         return -ENOMEM;
477                 lu_object_add(obj, below);
478         }
479
480         if (!ed->ed_next_ismd) {
481                 const struct cl_object_conf *cconf = lu2cl_conf(conf);
482                 struct echo_object_conf *econf = cl2echo_conf(cconf);
483
484                 LASSERT(econf->eoc_md);
485                 eco->eo_lsm = *econf->eoc_md;
486                 /* clear the lsm pointer so that it won't get freed. */
487                 *econf->eoc_md = NULL;
488         } else {
489                 eco->eo_lsm = NULL;
490         }
491
492         eco->eo_dev = ed;
493         atomic_set(&eco->eo_npages, 0);
494         cl_object_page_init(lu2cl(obj), sizeof(struct echo_page));
495
496         spin_lock(&ec->ec_lock);
497         list_add_tail(&eco->eo_obj_chain, &ec->ec_objects);
498         spin_unlock(&ec->ec_lock);
499
500         return 0;
501 }
502
503 /* taken from osc_unpackmd() */
504 static int echo_alloc_memmd(struct echo_device *ed,
505                             struct lov_stripe_md **lsmp)
506 {
507         int lsm_size;
508
509         /* If export is lov/osc then use their obd method */
510         if (ed->ed_next != NULL)
511                 return obd_alloc_memmd(ed->ed_ec->ec_exp, lsmp);
512         /* OFD has no unpackmd method, do everything here */
513         lsm_size = lov_stripe_md_size(1);
514
515         LASSERT(*lsmp == NULL);
516         OBD_ALLOC(*lsmp, lsm_size);
517         if (*lsmp == NULL)
518                 return -ENOMEM;
519
520         OBD_ALLOC((*lsmp)->lsm_oinfo[0], sizeof(struct lov_oinfo));
521         if ((*lsmp)->lsm_oinfo[0] == NULL) {
522                 OBD_FREE(*lsmp, lsm_size);
523                 return -ENOMEM;
524         }
525
526         loi_init((*lsmp)->lsm_oinfo[0]);
527         (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES;
528         ostid_set_seq_echo(&(*lsmp)->lsm_oi);
529
530         return lsm_size;
531 }
532
533 static int echo_free_memmd(struct echo_device *ed, struct lov_stripe_md **lsmp)
534 {
535         int lsm_size;
536
537         /* If export is lov/osc then use their obd method */
538         if (ed->ed_next != NULL)
539                 return obd_free_memmd(ed->ed_ec->ec_exp, lsmp);
540         /* OFD has no unpackmd method, do everything here */
541         lsm_size = lov_stripe_md_size(1);
542
543         LASSERT(*lsmp != NULL);
544         OBD_FREE((*lsmp)->lsm_oinfo[0], sizeof(struct lov_oinfo));
545         OBD_FREE(*lsmp, lsm_size);
546         *lsmp = NULL;
547         return 0;
548 }
549
550 static void echo_object_free(const struct lu_env *env, struct lu_object *obj)
551 {
552         struct echo_object *eco    = cl2echo_obj(lu2cl(obj));
553         struct echo_client_obd *ec = eco->eo_dev->ed_ec;
554
555         LASSERT(atomic_read(&eco->eo_npages) == 0);
556
557         spin_lock(&ec->ec_lock);
558         list_del_init(&eco->eo_obj_chain);
559         spin_unlock(&ec->ec_lock);
560
561         lu_object_fini(obj);
562         lu_object_header_fini(obj->lo_header);
563
564         if (eco->eo_lsm)
565                 echo_free_memmd(eco->eo_dev, &eco->eo_lsm);
566         OBD_SLAB_FREE_PTR(eco, echo_object_kmem);
567 }
568
569 static int echo_object_print(const struct lu_env *env, void *cookie,
570                             lu_printer_t p, const struct lu_object *o)
571 {
572         struct echo_object *obj = cl2echo_obj(lu2cl(o));
573
574         return (*p)(env, cookie, "echoclient-object@%p", obj);
575 }
576
577 static const struct lu_object_operations echo_lu_obj_ops = {
578         .loo_object_init      = echo_object_init,
579         .loo_object_delete    = NULL,
580         .loo_object_release   = NULL,
581         .loo_object_free      = echo_object_free,
582         .loo_object_print     = echo_object_print,
583         .loo_object_invariant = NULL
584 };
585 /** @} echo_lu_ops */
586
587 /** \defgroup echo_lu_dev_ops  lu_device operations
588  *
589  * Operations for echo lu device.
590  *
591  * @{
592  */
593 static struct lu_object *echo_object_alloc(const struct lu_env *env,
594                                            const struct lu_object_header *hdr,
595                                            struct lu_device *dev)
596 {
597         struct echo_object *eco;
598         struct lu_object *obj = NULL;
599
600         /* we're the top dev. */
601         LASSERT(hdr == NULL);
602         OBD_SLAB_ALLOC_PTR_GFP(eco, echo_object_kmem, __GFP_IO);
603         if (eco != NULL) {
604                 struct cl_object_header *hdr = &eco->eo_hdr;
605
606                 obj = &echo_obj2cl(eco)->co_lu;
607                 cl_object_header_init(hdr);
608                 lu_object_init(obj, &hdr->coh_lu, dev);
609                 lu_object_add_top(&hdr->coh_lu, obj);
610
611                 eco->eo_cl.co_ops = &echo_cl_obj_ops;
612                 obj->lo_ops       = &echo_lu_obj_ops;
613         }
614         return obj;
615 }
616
617 static struct lu_device_operations echo_device_lu_ops = {
618         .ldo_object_alloc   = echo_object_alloc,
619 };
620
621 /** @} echo_lu_dev_ops */
622
623 static struct cl_device_operations echo_device_cl_ops = {
624 };
625
626 /** \defgroup echo_init Setup and teardown
627  *
628  * Init and fini functions for echo client.
629  *
630  * @{
631  */
632 static int echo_site_init(const struct lu_env *env, struct echo_device *ed)
633 {
634         struct cl_site *site = &ed->ed_site_myself;
635         int rc;
636
637         /* initialize site */
638         rc = cl_site_init(site, &ed->ed_cl);
639         if (rc) {
640                 CERROR("Cannot initialize site for echo client(%d)\n", rc);
641                 return rc;
642         }
643
644         rc = lu_site_init_finish(&site->cs_lu);
645         if (rc)
646                 return rc;
647
648         ed->ed_site = site;
649         return 0;
650 }
651
652 static void echo_site_fini(const struct lu_env *env, struct echo_device *ed)
653 {
654         if (ed->ed_site) {
655                 if (!ed->ed_next_ismd)
656                         cl_site_fini(ed->ed_site);
657                 ed->ed_site = NULL;
658         }
659 }
660
661 static void *echo_thread_key_init(const struct lu_context *ctx,
662                           struct lu_context_key *key)
663 {
664         struct echo_thread_info *info;
665
666         OBD_SLAB_ALLOC_PTR_GFP(info, echo_thread_kmem, __GFP_IO);
667         if (info == NULL)
668                 info = ERR_PTR(-ENOMEM);
669         return info;
670 }
671
672 static void echo_thread_key_fini(const struct lu_context *ctx,
673                          struct lu_context_key *key, void *data)
674 {
675         struct echo_thread_info *info = data;
676         OBD_SLAB_FREE_PTR(info, echo_thread_kmem);
677 }
678
679 static void echo_thread_key_exit(const struct lu_context *ctx,
680                          struct lu_context_key *key, void *data)
681 {
682 }
683
684 static struct lu_context_key echo_thread_key = {
685         .lct_tags = LCT_CL_THREAD,
686         .lct_init = echo_thread_key_init,
687         .lct_fini = echo_thread_key_fini,
688         .lct_exit = echo_thread_key_exit
689 };
690
691 static void *echo_session_key_init(const struct lu_context *ctx,
692                                   struct lu_context_key *key)
693 {
694         struct echo_session_info *session;
695
696         OBD_SLAB_ALLOC_PTR_GFP(session, echo_session_kmem, __GFP_IO);
697         if (session == NULL)
698                 session = ERR_PTR(-ENOMEM);
699         return session;
700 }
701
702 static void echo_session_key_fini(const struct lu_context *ctx,
703                                  struct lu_context_key *key, void *data)
704 {
705         struct echo_session_info *session = data;
706         OBD_SLAB_FREE_PTR(session, echo_session_kmem);
707 }
708
709 static void echo_session_key_exit(const struct lu_context *ctx,
710                                  struct lu_context_key *key, void *data)
711 {
712 }
713
714 static struct lu_context_key echo_session_key = {
715         .lct_tags = LCT_SESSION,
716         .lct_init = echo_session_key_init,
717         .lct_fini = echo_session_key_fini,
718         .lct_exit = echo_session_key_exit
719 };
720
721 LU_TYPE_INIT_FINI(echo, &echo_thread_key, &echo_session_key);
722
723 #define ECHO_SEQ_WIDTH 0xffffffff
724 static int echo_fid_init(struct echo_device *ed, char *obd_name,
725                          struct seq_server_site *ss)
726 {
727         char *prefix;
728         int rc;
729
730         OBD_ALLOC_PTR(ed->ed_cl_seq);
731         if (ed->ed_cl_seq == NULL)
732                 return -ENOMEM;
733
734         OBD_ALLOC(prefix, MAX_OBD_NAME + 5);
735         if (prefix == NULL)
736                 GOTO(out_free_seq, rc = -ENOMEM);
737
738         snprintf(prefix, MAX_OBD_NAME + 5, "srv-%s", obd_name);
739
740         /* Init client side sequence-manager */
741         rc = seq_client_init(ed->ed_cl_seq, NULL,
742                              LUSTRE_SEQ_METADATA,
743                              prefix, ss->ss_server_seq);
744         ed->ed_cl_seq->lcs_width = ECHO_SEQ_WIDTH;
745         OBD_FREE(prefix, MAX_OBD_NAME + 5);
746         if (rc)
747                 GOTO(out_free_seq, rc);
748
749         return 0;
750
751 out_free_seq:
752         OBD_FREE_PTR(ed->ed_cl_seq);
753         ed->ed_cl_seq = NULL;
754         return rc;
755 }
756
757 static int echo_fid_fini(struct obd_device *obddev)
758 {
759         struct echo_device *ed = obd2echo_dev(obddev);
760
761         if (ed->ed_cl_seq != NULL) {
762                 seq_client_fini(ed->ed_cl_seq);
763                 OBD_FREE_PTR(ed->ed_cl_seq);
764                 ed->ed_cl_seq = NULL;
765         }
766
767         return 0;
768 }
769
770 static struct lu_device *echo_device_alloc(const struct lu_env *env,
771                                            struct lu_device_type *t,
772                                            struct lustre_cfg *cfg)
773 {
774         struct lu_device   *next;
775         struct echo_device *ed;
776         struct cl_device   *cd;
777         struct obd_device  *obd = NULL; /* to keep compiler happy */
778         struct obd_device  *tgt;
779         const char *tgt_type_name;
780         int rc;
781         int cleanup = 0;
782
783         OBD_ALLOC_PTR(ed);
784         if (ed == NULL)
785                 GOTO(out, rc = -ENOMEM);
786
787         cleanup = 1;
788         cd = &ed->ed_cl;
789         rc = cl_device_init(cd, t);
790         if (rc)
791                 GOTO(out, rc);
792
793         cd->cd_lu_dev.ld_ops = &echo_device_lu_ops;
794         cd->cd_ops = &echo_device_cl_ops;
795
796         cleanup = 2;
797         obd = class_name2obd(lustre_cfg_string(cfg, 0));
798         LASSERT(obd != NULL);
799         LASSERT(env != NULL);
800
801         tgt = class_name2obd(lustre_cfg_string(cfg, 1));
802         if (tgt == NULL) {
803                 CERROR("Can not find tgt device %s\n",
804                         lustre_cfg_string(cfg, 1));
805                 GOTO(out, rc = -ENODEV);
806         }
807
808         next = tgt->obd_lu_dev;
809         if (!strcmp(tgt->obd_type->typ_name, LUSTRE_MDT_NAME)) {
810                 ed->ed_next_ismd = 1;
811         } else {
812                 ed->ed_next_ismd = 0;
813                 rc = echo_site_init(env, ed);
814                 if (rc)
815                         GOTO(out, rc);
816         }
817         cleanup = 3;
818
819         rc = echo_client_setup(env, obd, cfg);
820         if (rc)
821                 GOTO(out, rc);
822
823         ed->ed_ec = &obd->u.echo_client;
824         cleanup = 4;
825
826         if (ed->ed_next_ismd) {
827                 /* Suppose to connect to some Metadata layer */
828                 struct lu_site *ls;
829                 struct lu_device *ld;
830                 int    found = 0;
831
832                 if (next == NULL) {
833                         CERROR("%s is not lu device type!\n",
834                                lustre_cfg_string(cfg, 1));
835                         GOTO(out, rc = -EINVAL);
836                 }
837
838                 tgt_type_name = lustre_cfg_string(cfg, 2);
839                 if (!tgt_type_name) {
840                         CERROR("%s no type name for echo %s setup\n",
841                                 lustre_cfg_string(cfg, 1),
842                                 tgt->obd_type->typ_name);
843                         GOTO(out, rc = -EINVAL);
844                 }
845
846                 ls = next->ld_site;
847
848                 spin_lock(&ls->ls_ld_lock);
849                 list_for_each_entry(ld, &ls->ls_ld_linkage, ld_linkage) {
850                         if (strcmp(ld->ld_type->ldt_name, tgt_type_name) == 0) {
851                                 found = 1;
852                                 break;
853                         }
854                 }
855                 spin_unlock(&ls->ls_ld_lock);
856
857                 if (found == 0) {
858                         CERROR("%s is not lu device type!\n",
859                                lustre_cfg_string(cfg, 1));
860                         GOTO(out, rc = -EINVAL);
861                 }
862
863                 next = ld;
864                 /* For MD echo client, it will use the site in MDS stack */
865                 ed->ed_site_myself.cs_lu = *ls;
866                 ed->ed_site = &ed->ed_site_myself;
867                 ed->ed_cl.cd_lu_dev.ld_site = &ed->ed_site_myself.cs_lu;
868                 rc = echo_fid_init(ed, obd->obd_name, lu_site2seq(ls));
869                 if (rc) {
870                         CERROR("echo fid init error %d\n", rc);
871                         GOTO(out, rc);
872                 }
873         } else {
874                  /* if echo client is to be stacked upon ost device, the next is
875                   * NULL since ost is not a clio device so far */
876                 if (next != NULL && !lu_device_is_cl(next))
877                         next = NULL;
878
879                 tgt_type_name = tgt->obd_type->typ_name;
880                 if (next != NULL) {
881                         LASSERT(next != NULL);
882                         if (next->ld_site != NULL)
883                                 GOTO(out, rc = -EBUSY);
884
885                         next->ld_site = &ed->ed_site->cs_lu;
886                         rc = next->ld_type->ldt_ops->ldto_device_init(env, next,
887                                                      next->ld_type->ldt_name,
888                                                      NULL);
889                         if (rc)
890                                 GOTO(out, rc);
891
892                         /* Tricky case, I have to determine the obd type since
893                          * CLIO uses the different parameters to initialize
894                          * objects for lov & osc. */
895                         if (strcmp(tgt_type_name, LUSTRE_LOV_NAME) == 0)
896                                 ed->ed_next_islov = 1;
897                         else
898                                 LASSERT(strcmp(tgt_type_name,
899                                                LUSTRE_OSC_NAME) == 0);
900                 } else
901                         LASSERT(strcmp(tgt_type_name, LUSTRE_OST_NAME) == 0);
902         }
903
904         ed->ed_next = next;
905         return &cd->cd_lu_dev;
906 out:
907         switch(cleanup) {
908         case 4: {
909                 int rc2;
910                 rc2 = echo_client_cleanup(obd);
911                 if (rc2)
912                         CERROR("Cleanup obd device %s error(%d)\n",
913                                obd->obd_name, rc2);
914         }
915
916         case 3:
917                 echo_site_fini(env, ed);
918         case 2:
919                 cl_device_fini(&ed->ed_cl);
920         case 1:
921                 OBD_FREE_PTR(ed);
922         case 0:
923         default:
924                 break;
925         }
926         return(ERR_PTR(rc));
927 }
928
929 static int echo_device_init(const struct lu_env *env, struct lu_device *d,
930                           const char *name, struct lu_device *next)
931 {
932         LBUG();
933         return 0;
934 }
935
936 static struct lu_device *echo_device_fini(const struct lu_env *env,
937                                           struct lu_device *d)
938 {
939         struct echo_device *ed = cl2echo_dev(lu2cl_dev(d));
940         struct lu_device *next = ed->ed_next;
941
942         while (next && !ed->ed_next_ismd)
943                 next = next->ld_type->ldt_ops->ldto_device_fini(env, next);
944         return NULL;
945 }
946
947 static void echo_lock_release(const struct lu_env *env,
948                               struct echo_lock *ecl,
949                               int still_used)
950 {
951         struct cl_lock *clk = echo_lock2cl(ecl);
952
953         cl_lock_get(clk);
954         cl_unuse(env, clk);
955         cl_lock_release(env, clk, "ec enqueue", ecl->el_object);
956         if (!still_used) {
957                 cl_lock_mutex_get(env, clk);
958                 cl_lock_cancel(env, clk);
959                 cl_lock_delete(env, clk);
960                 cl_lock_mutex_put(env, clk);
961         }
962         cl_lock_put(env, clk);
963 }
964
965 static struct lu_device *echo_device_free(const struct lu_env *env,
966                                           struct lu_device *d)
967 {
968         struct echo_device     *ed   = cl2echo_dev(lu2cl_dev(d));
969         struct echo_client_obd *ec   = ed->ed_ec;
970         struct echo_object     *eco;
971         struct lu_device       *next = ed->ed_next;
972
973         CDEBUG(D_INFO, "echo device:%p is going to be freed, next = %p\n",
974                ed, next);
975
976         lu_site_purge(env, &ed->ed_site->cs_lu, -1);
977
978         /* check if there are objects still alive.
979          * It shouldn't have any object because lu_site_purge would cleanup
980          * all of cached objects. Anyway, probably the echo device is being
981          * parallelly accessed.
982          */
983         spin_lock(&ec->ec_lock);
984         list_for_each_entry(eco, &ec->ec_objects, eo_obj_chain)
985                 eco->eo_deleted = 1;
986         spin_unlock(&ec->ec_lock);
987
988         /* purge again */
989         lu_site_purge(env, &ed->ed_site->cs_lu, -1);
990
991         CDEBUG(D_INFO,
992                "Waiting for the reference of echo object to be dropped\n");
993
994         /* Wait for the last reference to be dropped. */
995         spin_lock(&ec->ec_lock);
996         while (!list_empty(&ec->ec_objects)) {
997                 spin_unlock(&ec->ec_lock);
998                 CERROR("echo_client still has objects at cleanup time, "
999                        "wait for 1 second\n");
1000                 schedule_timeout_and_set_state(TASK_UNINTERRUPTIBLE,
1001                                                    cfs_time_seconds(1));
1002                 lu_site_purge(env, &ed->ed_site->cs_lu, -1);
1003                 spin_lock(&ec->ec_lock);
1004         }
1005         spin_unlock(&ec->ec_lock);
1006
1007         LASSERT(list_empty(&ec->ec_locks));
1008
1009         CDEBUG(D_INFO, "No object exists, exiting...\n");
1010
1011         echo_client_cleanup(d->ld_obd);
1012         echo_fid_fini(d->ld_obd);
1013         while (next && !ed->ed_next_ismd)
1014                 next = next->ld_type->ldt_ops->ldto_device_free(env, next);
1015
1016         LASSERT(ed->ed_site == lu2cl_site(d->ld_site));
1017         echo_site_fini(env, ed);
1018         cl_device_fini(&ed->ed_cl);
1019         OBD_FREE_PTR(ed);
1020
1021         return NULL;
1022 }
1023
1024 static const struct lu_device_type_operations echo_device_type_ops = {
1025         .ldto_init = echo_type_init,
1026         .ldto_fini = echo_type_fini,
1027
1028         .ldto_start = echo_type_start,
1029         .ldto_stop  = echo_type_stop,
1030
1031         .ldto_device_alloc = echo_device_alloc,
1032         .ldto_device_free  = echo_device_free,
1033         .ldto_device_init  = echo_device_init,
1034         .ldto_device_fini  = echo_device_fini
1035 };
1036
1037 static struct lu_device_type echo_device_type = {
1038         .ldt_tags     = LU_DEVICE_CL,
1039         .ldt_name     = LUSTRE_ECHO_CLIENT_NAME,
1040         .ldt_ops      = &echo_device_type_ops,
1041         .ldt_ctx_tags = LCT_CL_THREAD | LCT_MD_THREAD | LCT_DT_THREAD,
1042 };
1043 /** @} echo_init */
1044
1045 /** \defgroup echo_exports Exported operations
1046  *
1047  * exporting functions to echo client
1048  *
1049  * @{
1050  */
1051
1052 /* Interfaces to echo client obd device */
1053 static struct echo_object *cl_echo_object_find(struct echo_device *d,
1054                                                struct lov_stripe_md **lsmp)
1055 {
1056         struct lu_env *env;
1057         struct echo_thread_info *info;
1058         struct echo_object_conf *conf;
1059         struct lov_stripe_md    *lsm;
1060         struct echo_object *eco;
1061         struct cl_object   *obj;
1062         struct lu_fid *fid;
1063         int refcheck;
1064         int rc;
1065
1066         LASSERT(lsmp);
1067         lsm = *lsmp;
1068         LASSERT(lsm);
1069         LASSERTF(ostid_id(&lsm->lsm_oi) != 0, DOSTID"\n", POSTID(&lsm->lsm_oi));
1070         LASSERTF(ostid_seq(&lsm->lsm_oi) == FID_SEQ_ECHO, DOSTID"\n",
1071                  POSTID(&lsm->lsm_oi));
1072
1073         /* Never return an object if the obd is to be freed. */
1074         if (echo_dev2cl(d)->cd_lu_dev.ld_obd->obd_stopping)
1075                 return ERR_PTR(-ENODEV);
1076
1077         env = cl_env_get(&refcheck);
1078         if (IS_ERR(env))
1079                 return (void *)env;
1080
1081         info = echo_env_info(env);
1082         conf = &info->eti_conf;
1083         if (d->ed_next) {
1084                 if (!d->ed_next_islov) {
1085                         struct lov_oinfo *oinfo = lsm->lsm_oinfo[0];
1086                         LASSERT(oinfo != NULL);
1087                         oinfo->loi_oi = lsm->lsm_oi;
1088                         conf->eoc_cl.u.coc_oinfo = oinfo;
1089                 } else {
1090                         struct lustre_md *md;
1091                         md = &info->eti_md;
1092                         memset(md, 0, sizeof *md);
1093                         md->lsm = lsm;
1094                         conf->eoc_cl.u.coc_md = md;
1095                 }
1096         }
1097         conf->eoc_md = lsmp;
1098
1099         fid  = &info->eti_fid;
1100         rc = ostid_to_fid(fid, &lsm->lsm_oi, 0);
1101         if (rc != 0)
1102                 GOTO(out, eco = ERR_PTR(rc));
1103
1104         /* In the function below, .hs_keycmp resolves to
1105          * lu_obj_hop_keycmp() */
1106         /* coverity[overrun-buffer-val] */
1107         obj = cl_object_find(env, echo_dev2cl(d), fid, &conf->eoc_cl);
1108         if (IS_ERR(obj))
1109                 GOTO(out, eco = (void*)obj);
1110
1111         eco = cl2echo_obj(obj);
1112         if (eco->eo_deleted) {
1113                 cl_object_put(env, obj);
1114                 eco = ERR_PTR(-EAGAIN);
1115         }
1116
1117 out:
1118         cl_env_put(env, &refcheck);
1119         return eco;
1120 }
1121
1122 static int cl_echo_object_put(struct echo_object *eco)
1123 {
1124         struct lu_env *env;
1125         struct cl_object *obj = echo_obj2cl(eco);
1126         int refcheck;
1127
1128         env = cl_env_get(&refcheck);
1129         if (IS_ERR(env))
1130                 return PTR_ERR(env);
1131
1132         /* an external function to kill an object? */
1133         if (eco->eo_deleted) {
1134                 struct lu_object_header *loh = obj->co_lu.lo_header;
1135                 LASSERT(&eco->eo_hdr == luh2coh(loh));
1136                 set_bit(LU_OBJECT_HEARD_BANSHEE, &loh->loh_flags);
1137         }
1138
1139         cl_object_put(env, obj);
1140         cl_env_put(env, &refcheck);
1141         return 0;
1142 }
1143
1144 static int cl_echo_enqueue0(struct lu_env *env, struct echo_object *eco,
1145                             obd_off start, obd_off end, int mode,
1146                             __u64 *cookie , __u32 enqflags)
1147 {
1148         struct cl_io *io;
1149         struct cl_lock *lck;
1150         struct cl_object *obj;
1151         struct cl_lock_descr *descr;
1152         struct echo_thread_info *info;
1153         int rc = -ENOMEM;
1154
1155         info = echo_env_info(env);
1156         io = &info->eti_io;
1157         descr = &info->eti_descr;
1158         obj = echo_obj2cl(eco);
1159
1160         descr->cld_obj   = obj;
1161         descr->cld_start = cl_index(obj, start);
1162         descr->cld_end   = cl_index(obj, end);
1163         descr->cld_mode  = mode == LCK_PW ? CLM_WRITE : CLM_READ;
1164         descr->cld_enq_flags = enqflags;
1165         io->ci_obj = obj;
1166
1167         lck = cl_lock_request(env, io, descr, "ec enqueue", eco);
1168         if (lck) {
1169                 struct echo_client_obd *ec = eco->eo_dev->ed_ec;
1170                 struct echo_lock *el;
1171
1172                 rc = cl_wait(env, lck);
1173                 if (rc == 0) {
1174                         el = cl2echo_lock(cl_lock_at(lck, &echo_device_type));
1175                         spin_lock(&ec->ec_lock);
1176                         if (list_empty(&el->el_chain)) {
1177                                 list_add(&el->el_chain, &ec->ec_locks);
1178                                 el->el_cookie = ++ec->ec_unique;
1179                         }
1180                         atomic_inc(&el->el_refcount);
1181                         *cookie = el->el_cookie;
1182                         spin_unlock(&ec->ec_lock);
1183                 } else {
1184                         cl_lock_release(env, lck, "ec enqueue", current);
1185                 }
1186         }
1187         return rc;
1188 }
1189
1190 static int cl_echo_enqueue(struct echo_object *eco, obd_off start, obd_off end,
1191                            int mode, __u64 *cookie)
1192 {
1193         struct echo_thread_info *info;
1194         struct lu_env *env;
1195         struct cl_io *io;
1196         int refcheck;
1197         int result;
1198
1199         env = cl_env_get(&refcheck);
1200         if (IS_ERR(env))
1201                 return PTR_ERR(env);
1202
1203         info = echo_env_info(env);
1204         io = &info->eti_io;
1205
1206         io->ci_ignore_layout = 1;
1207         result = cl_io_init(env, io, CIT_MISC, echo_obj2cl(eco));
1208         if (result < 0)
1209                 GOTO(out, result);
1210         LASSERT(result == 0);
1211
1212         result = cl_echo_enqueue0(env, eco, start, end, mode, cookie, 0);
1213         cl_io_fini(env, io);
1214
1215 out:
1216         cl_env_put(env, &refcheck);
1217         return result;
1218 }
1219
1220 static int cl_echo_cancel0(struct lu_env *env, struct echo_device *ed,
1221                            __u64 cookie)
1222 {
1223         struct echo_client_obd *ec = ed->ed_ec;
1224         struct echo_lock       *ecl = NULL;
1225         struct list_head             *el;
1226         int found = 0, still_used = 0;
1227
1228         LASSERT(ec != NULL);
1229         spin_lock(&ec->ec_lock);
1230         list_for_each (el, &ec->ec_locks) {
1231                 ecl = list_entry (el, struct echo_lock, el_chain);
1232                 CDEBUG(D_INFO, "ecl: %p, cookie: "LPX64"\n", ecl, ecl->el_cookie);
1233                 found = (ecl->el_cookie == cookie);
1234                 if (found) {
1235                         if (atomic_dec_and_test(&ecl->el_refcount))
1236                                 list_del_init(&ecl->el_chain);
1237                         else
1238                                 still_used = 1;
1239                         break;
1240                 }
1241         }
1242         spin_unlock(&ec->ec_lock);
1243
1244         if (!found)
1245                 return -ENOENT;
1246
1247         echo_lock_release(env, ecl, still_used);
1248         return 0;
1249 }
1250
1251 static int cl_echo_cancel(struct echo_device *ed, __u64 cookie)
1252 {
1253         struct lu_env *env;
1254         int refcheck;
1255         int rc;
1256
1257         env = cl_env_get(&refcheck);
1258         if (IS_ERR(env))
1259                 return PTR_ERR(env);
1260
1261         rc = cl_echo_cancel0(env, ed, cookie);
1262
1263         cl_env_put(env, &refcheck);
1264         return rc;
1265 }
1266
1267 static int cl_echo_async_brw(const struct lu_env *env, struct cl_io *io,
1268                              enum cl_req_type unused, struct cl_2queue *queue)
1269 {
1270         struct cl_page *clp;
1271         struct cl_page *temp;
1272         int result = 0;
1273
1274         cl_page_list_for_each_safe(clp, temp, &queue->c2_qin) {
1275                 int rc;
1276                 rc = cl_page_cache_add(env, io, clp, CRT_WRITE);
1277                 if (rc == 0)
1278                         continue;
1279                 result = result ?: rc;
1280         }
1281         return result;
1282 }
1283
1284 static int cl_echo_object_brw(struct echo_object *eco, int rw, obd_off offset,
1285                               struct page **pages, int npages, int async)
1286 {
1287         struct lu_env      *env;
1288         struct echo_thread_info *info;
1289         struct cl_object        *obj = echo_obj2cl(eco);
1290         struct echo_device      *ed  = eco->eo_dev;
1291         struct cl_2queue        *queue;
1292         struct cl_io        *io;
1293         struct cl_page    *clp;
1294         struct lustre_handle    lh = { 0 };
1295         int page_size = cl_page_size(obj);
1296         int refcheck;
1297         int rc;
1298         int i;
1299
1300         LASSERT((offset & ~CFS_PAGE_MASK) == 0);
1301         LASSERT(ed->ed_next != NULL);
1302         env = cl_env_get(&refcheck);
1303         if (IS_ERR(env))
1304                 return PTR_ERR(env);
1305
1306         info    = echo_env_info(env);
1307         io      = &info->eti_io;
1308         queue   = &info->eti_queue;
1309
1310         cl_2queue_init(queue);
1311
1312         io->ci_ignore_layout = 1;
1313         rc = cl_io_init(env, io, CIT_MISC, obj);
1314         if (rc < 0)
1315                 GOTO(out, rc);
1316         LASSERT(rc == 0);
1317
1318
1319         rc = cl_echo_enqueue0(env, eco, offset,
1320                               offset + npages * PAGE_CACHE_SIZE - 1,
1321                               rw == READ ? LCK_PR : LCK_PW, &lh.cookie,
1322                               CEF_NEVER);
1323         if (rc < 0)
1324                 GOTO(error_lock, rc);
1325
1326         for (i = 0; i < npages; i++) {
1327                 LASSERT(pages[i]);
1328                 clp = cl_page_find(env, obj, cl_index(obj, offset),
1329                                    pages[i], CPT_TRANSIENT);
1330                 if (IS_ERR(clp)) {
1331                         rc = PTR_ERR(clp);
1332                         break;
1333                 }
1334                 LASSERT(clp->cp_type == CPT_TRANSIENT);
1335
1336                 rc = cl_page_own(env, io, clp);
1337                 if (rc) {
1338                         LASSERT(clp->cp_state == CPS_FREEING);
1339                         cl_page_put(env, clp);
1340                         break;
1341                 }
1342
1343                 cl_2queue_add(queue, clp);
1344
1345                 /* drop the reference count for cl_page_find, so that the page
1346                  * will be freed in cl_2queue_fini. */
1347                 cl_page_put(env, clp);
1348                 cl_page_clip(env, clp, 0, page_size);
1349
1350                 offset += page_size;
1351         }
1352
1353         if (rc == 0) {
1354                 enum cl_req_type typ = rw == READ ? CRT_READ : CRT_WRITE;
1355
1356                 async = async && (typ == CRT_WRITE);
1357                 if (async)
1358                         rc = cl_echo_async_brw(env, io, typ, queue);
1359                 else
1360                         rc = cl_io_submit_sync(env, io, typ, queue, 0);
1361                 CDEBUG(D_INFO, "echo_client %s write returns %d\n",
1362                        async ? "async" : "sync", rc);
1363         }
1364
1365         cl_echo_cancel0(env, ed, lh.cookie);
1366 error_lock:
1367         cl_2queue_discard(env, io, queue);
1368         cl_2queue_disown(env, io, queue);
1369         cl_2queue_fini(env, queue);
1370         cl_io_fini(env, io);
1371 out:
1372         cl_env_put(env, &refcheck);
1373         return rc;
1374 }
1375 /** @} echo_exports */
1376
1377
1378 static obd_id last_object_id;
1379
1380 static int
1381 echo_copyout_lsm (struct lov_stripe_md *lsm, void *_ulsm, int ulsm_nob)
1382 {
1383         struct lov_stripe_md *ulsm = _ulsm;
1384         int nob, i;
1385
1386         nob = offsetof (struct lov_stripe_md, lsm_oinfo[lsm->lsm_stripe_count]);
1387         if (nob > ulsm_nob)
1388                 return (-EINVAL);
1389
1390         if (copy_to_user (ulsm, lsm, sizeof(*ulsm)))
1391                 return (-EFAULT);
1392
1393         for (i = 0; i < lsm->lsm_stripe_count; i++) {
1394                 if (copy_to_user (ulsm->lsm_oinfo[i], lsm->lsm_oinfo[i],
1395                                       sizeof(lsm->lsm_oinfo[0])))
1396                         return (-EFAULT);
1397         }
1398         return 0;
1399 }
1400
1401 static int
1402 echo_copyin_lsm (struct echo_device *ed, struct lov_stripe_md *lsm,
1403                  void *ulsm, int ulsm_nob)
1404 {
1405         struct echo_client_obd *ec = ed->ed_ec;
1406         int                  i;
1407
1408         if (ulsm_nob < sizeof (*lsm))
1409                 return (-EINVAL);
1410
1411         if (copy_from_user (lsm, ulsm, sizeof (*lsm)))
1412                 return (-EFAULT);
1413
1414         if (lsm->lsm_stripe_count > ec->ec_nstripes ||
1415             lsm->lsm_magic != LOV_MAGIC ||
1416             (lsm->lsm_stripe_size & (~CFS_PAGE_MASK)) != 0 ||
1417             ((__u64)lsm->lsm_stripe_size * lsm->lsm_stripe_count > ~0UL))
1418                 return (-EINVAL);
1419
1420
1421         for (i = 0; i < lsm->lsm_stripe_count; i++) {
1422                 if (copy_from_user(lsm->lsm_oinfo[i],
1423                                        ((struct lov_stripe_md *)ulsm)-> \
1424                                        lsm_oinfo[i],
1425                                        sizeof(lsm->lsm_oinfo[0])))
1426                         return (-EFAULT);
1427         }
1428         return (0);
1429 }
1430
1431 static inline void echo_md_build_name(struct lu_name *lname, char *name,
1432                                       __u64 id)
1433 {
1434         sprintf(name, LPU64, id);
1435         lname->ln_name = name;
1436         lname->ln_namelen = strlen(name);
1437 }
1438
1439 /* similar to mdt_attr_get_complex */
1440 static int echo_big_lmm_get(const struct lu_env *env, struct md_object *o,
1441                             struct md_attr *ma)
1442 {
1443         struct echo_thread_info *info = echo_env_info(env);
1444         int                      rc;
1445
1446         LASSERT(ma->ma_lmm_size > 0);
1447
1448         rc = mo_xattr_get(env, o, &LU_BUF_NULL, XATTR_NAME_LOV);
1449         if (rc < 0)
1450                 return rc;
1451
1452         /* big_lmm may need to be grown */
1453         if (info->eti_big_lmmsize < rc) {
1454                 int size = size_roundup_power2(rc);
1455
1456                 if (info->eti_big_lmmsize > 0) {
1457                         /* free old buffer */
1458                         LASSERT(info->eti_big_lmm);
1459                         OBD_FREE_LARGE(info->eti_big_lmm,
1460                                        info->eti_big_lmmsize);
1461                         info->eti_big_lmm = NULL;
1462                         info->eti_big_lmmsize = 0;
1463                 }
1464
1465                 OBD_ALLOC_LARGE(info->eti_big_lmm, size);
1466                 if (info->eti_big_lmm == NULL)
1467                         return -ENOMEM;
1468                 info->eti_big_lmmsize = size;
1469         }
1470         LASSERT(info->eti_big_lmmsize >= rc);
1471
1472         info->eti_buf.lb_buf = info->eti_big_lmm;
1473         info->eti_buf.lb_len = info->eti_big_lmmsize;
1474         rc = mo_xattr_get(env, o, &info->eti_buf, XATTR_NAME_LOV);
1475         if (rc < 0)
1476                 return rc;
1477
1478         ma->ma_valid |= MA_LOV;
1479         ma->ma_lmm = info->eti_big_lmm;
1480         ma->ma_lmm_size = rc;
1481
1482         return 0;
1483 }
1484
1485 int echo_attr_get_complex(const struct lu_env *env, struct md_object *next,
1486                           struct md_attr *ma)
1487 {
1488         struct echo_thread_info *info = echo_env_info(env);
1489         struct lu_buf           *buf = &info->eti_buf;
1490         umode_t          mode = lu_object_attr(&next->mo_lu);
1491         int                      need = ma->ma_need;
1492         int                      rc = 0, rc2;
1493
1494         ma->ma_valid = 0;
1495
1496         if (need & MA_INODE) {
1497                 ma->ma_need = MA_INODE;
1498                 rc = mo_attr_get(env, next, ma);
1499                 if (rc)
1500                         GOTO(out, rc);
1501                 ma->ma_valid |= MA_INODE;
1502         }
1503
1504         if (need & MA_LOV) {
1505                 if (S_ISREG(mode) || S_ISDIR(mode)) {
1506                         LASSERT(ma->ma_lmm_size > 0);
1507                         buf->lb_buf = ma->ma_lmm;
1508                         buf->lb_len = ma->ma_lmm_size;
1509                         rc2 = mo_xattr_get(env, next, buf, XATTR_NAME_LOV);
1510                         if (rc2 > 0) {
1511                                 ma->ma_lmm_size = rc2;
1512                                 ma->ma_valid |= MA_LOV;
1513                         } else if (rc2 == -ENODATA) {
1514                                 /* no LOV EA */
1515                                 ma->ma_lmm_size = 0;
1516                         } else if (rc2 == -ERANGE) {
1517                                 rc2 = echo_big_lmm_get(env, next, ma);
1518                                 if (rc2 < 0)
1519                                         GOTO(out, rc = rc2);
1520                         } else {
1521                                 GOTO(out, rc = rc2);
1522                         }
1523                 }
1524         }
1525
1526 #ifdef CONFIG_FS_POSIX_ACL
1527         if (need & MA_ACL_DEF && S_ISDIR(mode)) {
1528                 buf->lb_buf = ma->ma_acl;
1529                 buf->lb_len = ma->ma_acl_size;
1530                 rc2 = mo_xattr_get(env, next, buf, XATTR_NAME_ACL_DEFAULT);
1531                 if (rc2 > 0) {
1532                         ma->ma_acl_size = rc2;
1533                         ma->ma_valid |= MA_ACL_DEF;
1534                 } else if (rc2 == -ENODATA) {
1535                         /* no ACLs */
1536                         ma->ma_acl_size = 0;
1537                 } else {
1538                         GOTO(out, rc = rc2);
1539                 }
1540         }
1541 #endif
1542 out:
1543         ma->ma_need = need;
1544         CDEBUG(D_INODE, "after getattr rc = %d, ma_valid = "LPX64" ma_lmm=%p\n",
1545                rc, ma->ma_valid, ma->ma_lmm);
1546         return rc;
1547 }
1548
1549 static int
1550 echo_md_create_internal(const struct lu_env *env, struct echo_device *ed,
1551                         struct md_object *parent, struct lu_fid *fid,
1552                         struct lu_name *lname, struct md_op_spec *spec,
1553                         struct md_attr *ma)
1554 {
1555         struct lu_object        *ec_child, *child;
1556         struct lu_device        *ld = ed->ed_next;
1557         struct echo_thread_info *info = echo_env_info(env);
1558         struct lu_fid           *fid2 = &info->eti_fid2;
1559         struct lu_object_conf    conf = { .loc_flags = LOC_F_NEW };
1560         int                      rc;
1561
1562         rc = mdo_lookup(env, parent, lname, fid2, spec);
1563         if (rc == 0)
1564                 return -EEXIST;
1565         else if (rc != -ENOENT)
1566                 return rc;
1567
1568         ec_child = lu_object_find_at(env, &ed->ed_cl.cd_lu_dev,
1569                                      fid, &conf);
1570         if (IS_ERR(ec_child)) {
1571                 CERROR("Can not find the child "DFID": rc = %ld\n", PFID(fid),
1572                         PTR_ERR(ec_child));
1573                 return PTR_ERR(ec_child);
1574         }
1575
1576         child = lu_object_locate(ec_child->lo_header, ld->ld_type);
1577         if (child == NULL) {
1578                 CERROR("Can not locate the child "DFID"\n", PFID(fid));
1579                 GOTO(out_put, rc = -EINVAL);
1580         }
1581
1582         CDEBUG(D_RPCTRACE, "Start creating object "DFID" %s %p\n",
1583                PFID(lu_object_fid(&parent->mo_lu)), lname->ln_name, parent);
1584
1585         /*
1586          * Do not perform lookup sanity check. We know that name does not exist.
1587          */
1588         spec->sp_cr_lookup = 0;
1589         rc = mdo_create(env, parent, lname, lu2md(child), spec, ma);
1590         if (rc) {
1591                 CERROR("Can not create child "DFID": rc = %d\n", PFID(fid), rc);
1592                 GOTO(out_put, rc);
1593         }
1594         CDEBUG(D_RPCTRACE, "End creating object "DFID" %s %p rc  = %d\n",
1595                PFID(lu_object_fid(&parent->mo_lu)), lname->ln_name, parent, rc);
1596 out_put:
1597         lu_object_put(env, ec_child);
1598         return rc;
1599 }
1600
1601 static int echo_set_lmm_size(const struct lu_env *env, struct lu_device *ld,
1602                              struct md_attr *ma)
1603 {
1604         struct echo_thread_info *info = echo_env_info(env);
1605
1606         if (strcmp(ld->ld_type->ldt_name, LUSTRE_MDD_NAME)) {
1607                 ma->ma_lmm = (void *)&info->eti_lmm;
1608                 ma->ma_lmm_size = sizeof(info->eti_lmm);
1609         } else {
1610                 LASSERT(info->eti_big_lmmsize);
1611                 ma->ma_lmm = info->eti_big_lmm;
1612                 ma->ma_lmm_size = info->eti_big_lmmsize;
1613         }
1614
1615         return 0;
1616 }
1617
1618 static int echo_create_md_object(const struct lu_env *env,
1619                                  struct echo_device *ed,
1620                                  struct lu_object *ec_parent,
1621                                  struct lu_fid *fid,
1622                                  char *name, int namelen,
1623                                  __u64 id, __u32 mode, int count,
1624                                  int stripe_count, int stripe_offset)
1625 {
1626         struct lu_object        *parent;
1627         struct echo_thread_info *info = echo_env_info(env);
1628         struct lu_name    *lname = &info->eti_lname;
1629         struct md_op_spec       *spec = &info->eti_spec;
1630         struct md_attr    *ma = &info->eti_ma;
1631         struct lu_device        *ld = ed->ed_next;
1632         int                   rc = 0;
1633         int                   i;
1634
1635         if (ec_parent == NULL)
1636                 return -1;
1637         parent = lu_object_locate(ec_parent->lo_header, ld->ld_type);
1638         if (parent == NULL)
1639                 return -ENXIO;
1640
1641         memset(ma, 0, sizeof(*ma));
1642         memset(spec, 0, sizeof(*spec));
1643         if (stripe_count != 0) {
1644                 spec->sp_cr_flags |= FMODE_WRITE;
1645                 echo_set_lmm_size(env, ld, ma);
1646                 if (stripe_count != -1) {
1647                         struct lov_user_md_v3 *lum = &info->eti_lum;
1648
1649                         lum->lmm_magic = LOV_USER_MAGIC_V3;
1650                         lum->lmm_stripe_count = stripe_count;
1651                         lum->lmm_stripe_offset = stripe_offset;
1652                         lum->lmm_pattern = 0;
1653                         spec->u.sp_ea.eadata = lum;
1654                         spec->u.sp_ea.eadatalen = sizeof(*lum);
1655                         spec->sp_cr_flags |= MDS_OPEN_HAS_EA;
1656                 }
1657         }
1658
1659         ma->ma_attr.la_mode = mode;
1660         ma->ma_attr.la_valid = LA_CTIME | LA_MODE;
1661         ma->ma_attr.la_ctime = cfs_time_current_64();
1662
1663         if (name != NULL) {
1664                 lname->ln_name = name;
1665                 lname->ln_namelen = namelen;
1666                 /* If name is specified, only create one object by name */
1667                 rc = echo_md_create_internal(env, ed, lu2md(parent), fid, lname,
1668                                              spec, ma);
1669                 return rc;
1670         }
1671
1672         /* Create multiple object sequenced by id */
1673         for (i = 0; i < count; i++) {
1674                 char *tmp_name = info->eti_name;
1675
1676                 echo_md_build_name(lname, tmp_name, id);
1677
1678                 rc = echo_md_create_internal(env, ed, lu2md(parent), fid, lname,
1679                                              spec, ma);
1680                 if (rc) {
1681                         CERROR("Can not create child %s: rc = %d\n", tmp_name,
1682                                 rc);
1683                         break;
1684                 }
1685                 id++;
1686                 fid->f_oid++;
1687         }
1688
1689         return rc;
1690 }
1691
1692 static struct lu_object *echo_md_lookup(const struct lu_env *env,
1693                                         struct echo_device *ed,
1694                                         struct md_object *parent,
1695                                         struct lu_name *lname)
1696 {
1697         struct echo_thread_info *info = echo_env_info(env);
1698         struct lu_fid      *fid = &info->eti_fid;
1699         struct lu_object        *child;
1700         int    rc;
1701
1702         CDEBUG(D_INFO, "lookup %s in parent "DFID" %p\n", lname->ln_name,
1703                PFID(fid), parent);
1704         rc = mdo_lookup(env, parent, lname, fid, NULL);
1705         if (rc) {
1706                 CERROR("lookup %s: rc = %d\n", lname->ln_name, rc);
1707                 return ERR_PTR(rc);
1708         }
1709
1710         /* In the function below, .hs_keycmp resolves to
1711          * lu_obj_hop_keycmp() */
1712         /* coverity[overrun-buffer-val] */
1713         child = lu_object_find_at(env, &ed->ed_cl.cd_lu_dev, fid, NULL);
1714
1715         return child;
1716 }
1717
1718 static int echo_setattr_object(const struct lu_env *env,
1719                                struct echo_device *ed,
1720                                struct lu_object *ec_parent,
1721                                __u64 id, int count)
1722 {
1723         struct lu_object        *parent;
1724         struct echo_thread_info *info = echo_env_info(env);
1725         struct lu_name    *lname = &info->eti_lname;
1726         char                *name = info->eti_name;
1727         struct lu_device        *ld = ed->ed_next;
1728         struct lu_buf      *buf = &info->eti_buf;
1729         int                   rc = 0;
1730         int                   i;
1731
1732         if (ec_parent == NULL)
1733                 return -1;
1734         parent = lu_object_locate(ec_parent->lo_header, ld->ld_type);
1735         if (parent == NULL)
1736                 return -ENXIO;
1737
1738         for (i = 0; i < count; i++) {
1739                 struct lu_object *ec_child, *child;
1740
1741                 echo_md_build_name(lname, name, id);
1742
1743                 ec_child = echo_md_lookup(env, ed, lu2md(parent), lname);
1744                 if (IS_ERR(ec_child)) {
1745                         CERROR("Can't find child %s: rc = %ld\n",
1746                                 lname->ln_name, PTR_ERR(ec_child));
1747                         return PTR_ERR(ec_child);
1748                 }
1749
1750                 child = lu_object_locate(ec_child->lo_header, ld->ld_type);
1751                 if (child == NULL) {
1752                         CERROR("Can not locate the child %s\n", lname->ln_name);
1753                         lu_object_put(env, ec_child);
1754                         rc = -EINVAL;
1755                         break;
1756                 }
1757
1758                 CDEBUG(D_RPCTRACE, "Start setattr object "DFID"\n",
1759                        PFID(lu_object_fid(child)));
1760
1761                 buf->lb_buf = info->eti_xattr_buf;
1762                 buf->lb_len = sizeof(info->eti_xattr_buf);
1763
1764                 sprintf(name, "%s.test1", XATTR_USER_PREFIX);
1765                 rc = mo_xattr_set(env, lu2md(child), buf, name,
1766                                   LU_XATTR_CREATE);
1767                 if (rc < 0) {
1768                         CERROR("Can not setattr child "DFID": rc = %d\n",
1769                                 PFID(lu_object_fid(child)), rc);
1770                         lu_object_put(env, ec_child);
1771                         break;
1772                 }
1773                 CDEBUG(D_RPCTRACE, "End setattr object "DFID"\n",
1774                        PFID(lu_object_fid(child)));
1775                 id++;
1776                 lu_object_put(env, ec_child);
1777         }
1778         return rc;
1779 }
1780
1781 static int echo_getattr_object(const struct lu_env *env,
1782                                struct echo_device *ed,
1783                                struct lu_object *ec_parent,
1784                                __u64 id, int count)
1785 {
1786         struct lu_object        *parent;
1787         struct echo_thread_info *info = echo_env_info(env);
1788         struct lu_name    *lname = &info->eti_lname;
1789         char                *name = info->eti_name;
1790         struct md_attr    *ma = &info->eti_ma;
1791         struct lu_device        *ld = ed->ed_next;
1792         int                   rc = 0;
1793         int                   i;
1794
1795         if (ec_parent == NULL)
1796                 return -1;
1797         parent = lu_object_locate(ec_parent->lo_header, ld->ld_type);
1798         if (parent == NULL)
1799                 return -ENXIO;
1800
1801         memset(ma, 0, sizeof(*ma));
1802         ma->ma_need |= MA_INODE | MA_LOV | MA_PFID | MA_HSM | MA_ACL_DEF;
1803         ma->ma_acl = info->eti_xattr_buf;
1804         ma->ma_acl_size = sizeof(info->eti_xattr_buf);
1805
1806         for (i = 0; i < count; i++) {
1807                 struct lu_object *ec_child, *child;
1808
1809                 ma->ma_valid = 0;
1810                 echo_md_build_name(lname, name, id);
1811                 echo_set_lmm_size(env, ld, ma);
1812
1813                 ec_child = echo_md_lookup(env, ed, lu2md(parent), lname);
1814                 if (IS_ERR(ec_child)) {
1815                         CERROR("Can't find child %s: rc = %ld\n",
1816                                lname->ln_name, PTR_ERR(ec_child));
1817                         return PTR_ERR(ec_child);
1818                 }
1819
1820                 child = lu_object_locate(ec_child->lo_header, ld->ld_type);
1821                 if (child == NULL) {
1822                         CERROR("Can not locate the child %s\n", lname->ln_name);
1823                         lu_object_put(env, ec_child);
1824                         return -EINVAL;
1825                 }
1826
1827                 CDEBUG(D_RPCTRACE, "Start getattr object "DFID"\n",
1828                        PFID(lu_object_fid(child)));
1829                 rc = echo_attr_get_complex(env, lu2md(child), ma);
1830                 if (rc) {
1831                         CERROR("Can not getattr child "DFID": rc = %d\n",
1832                                 PFID(lu_object_fid(child)), rc);
1833                         lu_object_put(env, ec_child);
1834                         break;
1835                 }
1836                 CDEBUG(D_RPCTRACE, "End getattr object "DFID"\n",
1837                        PFID(lu_object_fid(child)));
1838                 id++;
1839                 lu_object_put(env, ec_child);
1840         }
1841
1842         return rc;
1843 }
1844
1845 static int echo_lookup_object(const struct lu_env *env,
1846                               struct echo_device *ed,
1847                               struct lu_object *ec_parent,
1848                               __u64 id, int count)
1849 {
1850         struct lu_object        *parent;
1851         struct echo_thread_info *info = echo_env_info(env);
1852         struct lu_name    *lname = &info->eti_lname;
1853         char                *name = info->eti_name;
1854         struct lu_fid      *fid = &info->eti_fid;
1855         struct lu_device        *ld = ed->ed_next;
1856         int                   rc = 0;
1857         int                   i;
1858
1859         if (ec_parent == NULL)
1860                 return -1;
1861         parent = lu_object_locate(ec_parent->lo_header, ld->ld_type);
1862         if (parent == NULL)
1863                 return -ENXIO;
1864
1865         /*prepare the requests*/
1866         for (i = 0; i < count; i++) {
1867                 echo_md_build_name(lname, name, id);
1868
1869                 CDEBUG(D_RPCTRACE, "Start lookup object "DFID" %s %p\n",
1870                        PFID(lu_object_fid(parent)), lname->ln_name, parent);
1871
1872                 rc = mdo_lookup(env, lu2md(parent), lname, fid, NULL);
1873                 if (rc) {
1874                         CERROR("Can not lookup child %s: rc = %d\n", name, rc);
1875                         break;
1876                 }
1877                 CDEBUG(D_RPCTRACE, "End lookup object "DFID" %s %p\n",
1878                        PFID(lu_object_fid(parent)), lname->ln_name, parent);
1879
1880                 id++;
1881         }
1882         return rc;
1883 }
1884
1885 static int echo_md_destroy_internal(const struct lu_env *env,
1886                                     struct echo_device *ed,
1887                                     struct md_object *parent,
1888                                     struct lu_name *lname,
1889                                     struct md_attr *ma)
1890 {
1891         struct lu_device   *ld = ed->ed_next;
1892         struct lu_object   *ec_child;
1893         struct lu_object   *child;
1894         int              rc;
1895
1896         ec_child = echo_md_lookup(env, ed, parent, lname);
1897         if (IS_ERR(ec_child)) {
1898                 CERROR("Can't find child %s: rc = %ld\n", lname->ln_name,
1899                         PTR_ERR(ec_child));
1900                 return PTR_ERR(ec_child);
1901         }
1902
1903         child = lu_object_locate(ec_child->lo_header, ld->ld_type);
1904         if (child == NULL) {
1905                 CERROR("Can not locate the child %s\n", lname->ln_name);
1906                 GOTO(out_put, rc = -EINVAL);
1907         }
1908
1909         CDEBUG(D_RPCTRACE, "Start destroy object "DFID" %s %p\n",
1910                PFID(lu_object_fid(&parent->mo_lu)), lname->ln_name, parent);
1911
1912         rc = mdo_unlink(env, parent, lu2md(child), lname, ma, 0);
1913         if (rc) {
1914                 CERROR("Can not unlink child %s: rc = %d\n",
1915                         lname->ln_name, rc);
1916                 GOTO(out_put, rc);
1917         }
1918         CDEBUG(D_RPCTRACE, "End destroy object "DFID" %s %p\n",
1919                PFID(lu_object_fid(&parent->mo_lu)), lname->ln_name, parent);
1920 out_put:
1921         lu_object_put(env, ec_child);
1922         return rc;
1923 }
1924
1925 static int echo_destroy_object(const struct lu_env *env,
1926                                struct echo_device *ed,
1927                                struct lu_object *ec_parent,
1928                                char *name, int namelen,
1929                                __u64 id, __u32 mode,
1930                                int count)
1931 {
1932         struct echo_thread_info *info = echo_env_info(env);
1933         struct lu_name    *lname = &info->eti_lname;
1934         struct md_attr    *ma = &info->eti_ma;
1935         struct lu_device        *ld = ed->ed_next;
1936         struct lu_object        *parent;
1937         int                   rc = 0;
1938         int                   i;
1939
1940         parent = lu_object_locate(ec_parent->lo_header, ld->ld_type);
1941         if (parent == NULL)
1942                 return -EINVAL;
1943
1944         memset(ma, 0, sizeof(*ma));
1945         ma->ma_attr.la_mode = mode;
1946         ma->ma_attr.la_valid = LA_CTIME;
1947         ma->ma_attr.la_ctime = cfs_time_current_64();
1948         ma->ma_need = MA_INODE;
1949         ma->ma_valid = 0;
1950
1951         if (name != NULL) {
1952                 lname->ln_name = name;
1953                 lname->ln_namelen = namelen;
1954                 rc = echo_md_destroy_internal(env, ed, lu2md(parent), lname,
1955                                               ma);
1956                 return rc;
1957         }
1958
1959         /*prepare the requests*/
1960         for (i = 0; i < count; i++) {
1961                 char *tmp_name = info->eti_name;
1962
1963                 ma->ma_valid = 0;
1964                 echo_md_build_name(lname, tmp_name, id);
1965
1966                 rc = echo_md_destroy_internal(env, ed, lu2md(parent), lname,
1967                                               ma);
1968                 if (rc) {
1969                         CERROR("Can not unlink child %s: rc = %d\n", name, rc);
1970                         break;
1971                 }
1972                 id++;
1973         }
1974
1975         return rc;
1976 }
1977
1978 static struct lu_object *echo_resolve_path(const struct lu_env *env,
1979                                            struct echo_device *ed, char *path,
1980                                            int path_len)
1981 {
1982         struct lu_device        *ld = ed->ed_next;
1983         struct md_device        *md = lu2md_dev(ld);
1984         struct echo_thread_info *info = echo_env_info(env);
1985         struct lu_fid      *fid = &info->eti_fid;
1986         struct lu_name    *lname = &info->eti_lname;
1987         struct lu_object        *parent = NULL;
1988         struct lu_object        *child = NULL;
1989         int rc = 0;
1990
1991         /*Only support MDD layer right now*/
1992         rc = md->md_ops->mdo_root_get(env, md, fid);
1993         if (rc) {
1994                 CERROR("get root error: rc = %d\n", rc);
1995                 return ERR_PTR(rc);
1996         }
1997
1998         /* In the function below, .hs_keycmp resolves to
1999          * lu_obj_hop_keycmp() */
2000         /* coverity[overrun-buffer-val] */
2001         parent = lu_object_find_at(env, &ed->ed_cl.cd_lu_dev, fid, NULL);
2002         if (IS_ERR(parent)) {
2003                 CERROR("Can not find the parent "DFID": rc = %ld\n",
2004                         PFID(fid), PTR_ERR(parent));
2005                 return parent;
2006         }
2007
2008         while (1) {
2009                 struct lu_object *ld_parent;
2010                 char *e;
2011
2012                 e = strsep(&path, "/");
2013                 if (e == NULL)
2014                         break;
2015
2016                 if (e[0] == 0) {
2017                         if (!path || path[0] == '\0')
2018                                 break;
2019                         continue;
2020                 }
2021
2022                 lname->ln_name = e;
2023                 lname->ln_namelen = strlen(e);
2024
2025                 ld_parent = lu_object_locate(parent->lo_header, ld->ld_type);
2026                 if (ld_parent == NULL) {
2027                         lu_object_put(env, parent);
2028                         rc = -EINVAL;
2029                         break;
2030                 }
2031
2032                 child = echo_md_lookup(env, ed, lu2md(ld_parent), lname);
2033                 lu_object_put(env, parent);
2034                 if (IS_ERR(child)) {
2035                         rc = (int)PTR_ERR(child);
2036                         CERROR("lookup %s under parent "DFID": rc = %d\n",
2037                                 lname->ln_name, PFID(lu_object_fid(ld_parent)),
2038                                 rc);
2039                         break;
2040                 }
2041                 parent = child;
2042         }
2043         if (rc)
2044                 return ERR_PTR(rc);
2045
2046         return parent;
2047 }
2048
2049 static void echo_ucred_init(struct lu_env *env)
2050 {
2051         struct lu_ucred *ucred = lu_ucred(env);
2052
2053         ucred->uc_valid = UCRED_INVALID;
2054
2055         ucred->uc_suppgids[0] = -1;
2056         ucred->uc_suppgids[1] = -1;
2057
2058         ucred->uc_uid   = ucred->uc_o_uid   =
2059                                 from_kuid(&init_user_ns, current_uid());
2060         ucred->uc_gid   = ucred->uc_o_gid   =
2061                                 from_kgid(&init_user_ns, current_gid());
2062         ucred->uc_fsuid = ucred->uc_o_fsuid =
2063                                 from_kuid(&init_user_ns, current_fsuid());
2064         ucred->uc_fsgid = ucred->uc_o_fsgid =
2065                                 from_kgid(&init_user_ns, current_fsgid());
2066         ucred->uc_cap   = cfs_curproc_cap_pack();
2067
2068         /* remove fs privilege for non-root user. */
2069         if (ucred->uc_fsuid)
2070                 ucred->uc_cap &= ~CFS_CAP_FS_MASK;
2071         ucred->uc_valid = UCRED_NEW;
2072 }
2073
2074 static void echo_ucred_fini(struct lu_env *env)
2075 {
2076         struct lu_ucred *ucred = lu_ucred(env);
2077         ucred->uc_valid = UCRED_INIT;
2078 }
2079
2080 #define ECHO_MD_CTX_TAG (LCT_REMEMBER | LCT_MD_THREAD)
2081 #define ECHO_MD_SES_TAG (LCT_REMEMBER | LCT_SESSION)
2082 static int echo_md_handler(struct echo_device *ed, int command,
2083                            char *path, int path_len, __u64 id, int count,
2084                            struct obd_ioctl_data *data)
2085 {
2086         struct echo_thread_info *info;
2087         struct lu_device      *ld = ed->ed_next;
2088         struct lu_env    *env;
2089         int                 refcheck;
2090         struct lu_object      *parent;
2091         char              *name = NULL;
2092         int                 namelen = data->ioc_plen2;
2093         int                 rc = 0;
2094
2095         if (ld == NULL) {
2096                 CERROR("MD echo client is not being initialized properly\n");
2097                 return -EINVAL;
2098         }
2099
2100         if (strcmp(ld->ld_type->ldt_name, LUSTRE_MDD_NAME)) {
2101                 CERROR("Only support MDD layer right now!\n");
2102                 return -EINVAL;
2103         }
2104
2105         env = cl_env_get(&refcheck);
2106         if (IS_ERR(env))
2107                 return PTR_ERR(env);
2108
2109         rc = lu_env_refill_by_tags(env, ECHO_MD_CTX_TAG, ECHO_MD_SES_TAG);
2110         if (rc != 0)
2111                 GOTO(out_env, rc);
2112
2113         /* init big_lmm buffer */
2114         info = echo_env_info(env);
2115         LASSERT(info->eti_big_lmm == NULL);
2116         OBD_ALLOC_LARGE(info->eti_big_lmm, MIN_MD_SIZE);
2117         if (info->eti_big_lmm == NULL)
2118                 GOTO(out_env, rc = -ENOMEM);
2119         info->eti_big_lmmsize = MIN_MD_SIZE;
2120
2121         parent = echo_resolve_path(env, ed, path, path_len);
2122         if (IS_ERR(parent)) {
2123                 CERROR("Can not resolve the path %s: rc = %ld\n", path,
2124                         PTR_ERR(parent));
2125                 GOTO(out_free, rc = PTR_ERR(parent));
2126         }
2127
2128         if (namelen > 0) {
2129                 OBD_ALLOC(name, namelen + 1);
2130                 if (name == NULL)
2131                         GOTO(out_put, rc = -ENOMEM);
2132                 if (copy_from_user(name, data->ioc_pbuf2, namelen))
2133                         GOTO(out_name, rc = -EFAULT);
2134         }
2135
2136         echo_ucred_init(env);
2137
2138         switch (command) {
2139         case ECHO_MD_CREATE:
2140         case ECHO_MD_MKDIR: {
2141                 struct echo_thread_info *info = echo_env_info(env);
2142                 __u32 mode = data->ioc_obdo2.o_mode;
2143                 struct lu_fid *fid = &info->eti_fid;
2144                 int stripe_count = (int)data->ioc_obdo2.o_misc;
2145                 int stripe_index = (int)data->ioc_obdo2.o_stripe_idx;
2146
2147                 rc = ostid_to_fid(fid, &data->ioc_obdo1.o_oi, 0);
2148                 if (rc != 0)
2149                         break;
2150
2151                 /* In the function below, .hs_keycmp resolves to
2152                  * lu_obj_hop_keycmp() */
2153                 /* coverity[overrun-buffer-val] */
2154                 rc = echo_create_md_object(env, ed, parent, fid, name, namelen,
2155                                            id, mode, count, stripe_count,
2156                                            stripe_index);
2157                 break;
2158         }
2159         case ECHO_MD_DESTROY:
2160         case ECHO_MD_RMDIR: {
2161                 __u32 mode = data->ioc_obdo2.o_mode;
2162
2163                 rc = echo_destroy_object(env, ed, parent, name, namelen,
2164                                          id, mode, count);
2165                 break;
2166         }
2167         case ECHO_MD_LOOKUP:
2168                 rc = echo_lookup_object(env, ed, parent, id, count);
2169                 break;
2170         case ECHO_MD_GETATTR:
2171                 rc = echo_getattr_object(env, ed, parent, id, count);
2172                 break;
2173         case ECHO_MD_SETATTR:
2174                 rc = echo_setattr_object(env, ed, parent, id, count);
2175                 break;
2176         default:
2177                 CERROR("unknown command %d\n", command);
2178                 rc = -EINVAL;
2179                 break;
2180         }
2181         echo_ucred_fini(env);
2182
2183 out_name:
2184         if (name != NULL)
2185                 OBD_FREE(name, namelen + 1);
2186 out_put:
2187         lu_object_put(env, parent);
2188 out_free:
2189         LASSERT(info->eti_big_lmm);
2190         OBD_FREE_LARGE(info->eti_big_lmm, info->eti_big_lmmsize);
2191         info->eti_big_lmm = NULL;
2192         info->eti_big_lmmsize = 0;
2193 out_env:
2194         cl_env_put(env, &refcheck);
2195         return rc;
2196 }
2197
2198 static int echo_create_object(const struct lu_env *env, struct echo_device *ed,
2199                               int on_target, struct obdo *oa, void *ulsm,
2200                               int ulsm_nob, struct obd_trans_info *oti)
2201 {
2202         struct echo_object     *eco;
2203         struct echo_client_obd *ec = ed->ed_ec;
2204         struct lov_stripe_md   *lsm = NULL;
2205         int                  rc;
2206         int                  created = 0;
2207
2208         if ((oa->o_valid & OBD_MD_FLID) == 0 && /* no obj id */
2209             (on_target ||                      /* set_stripe */
2210              ec->ec_nstripes != 0)) {      /* LOV */
2211                 CERROR ("No valid oid\n");
2212                 return -EINVAL;
2213         }
2214
2215         rc = echo_alloc_memmd(ed, &lsm);
2216         if (rc < 0) {
2217                 CERROR("Cannot allocate md: rc = %d\n", rc);
2218                 GOTO(failed, rc);
2219         }
2220
2221         if (ulsm != NULL) {
2222                 int i, idx;
2223
2224                 rc = echo_copyin_lsm (ed, lsm, ulsm, ulsm_nob);
2225                 if (rc != 0)
2226                         GOTO(failed, rc);
2227
2228                 if (lsm->lsm_stripe_count == 0)
2229                         lsm->lsm_stripe_count = ec->ec_nstripes;
2230
2231                 if (lsm->lsm_stripe_size == 0)
2232                         lsm->lsm_stripe_size = PAGE_CACHE_SIZE;
2233
2234                 idx = cfs_rand();
2235
2236                 /* setup stripes: indices + default ids if required */
2237                 for (i = 0; i < lsm->lsm_stripe_count; i++) {
2238                         if (ostid_id(&lsm->lsm_oinfo[i]->loi_oi) == 0)
2239                                 lsm->lsm_oinfo[i]->loi_oi = lsm->lsm_oi;
2240
2241                         lsm->lsm_oinfo[i]->loi_ost_idx =
2242                                 (idx + i) % ec->ec_nstripes;
2243                 }
2244         }
2245
2246         /* setup object ID here for !on_target and LOV hint */
2247         if (oa->o_valid & OBD_MD_FLID) {
2248                 LASSERT(oa->o_valid & OBD_MD_FLGROUP);
2249                 lsm->lsm_oi = oa->o_oi;
2250         }
2251
2252         if (ostid_id(&lsm->lsm_oi) == 0)
2253                 ostid_set_id(&lsm->lsm_oi, ++last_object_id);
2254
2255         rc = 0;
2256         if (on_target) {
2257                 /* Only echo objects are allowed to be created */
2258                 LASSERT((oa->o_valid & OBD_MD_FLGROUP) &&
2259                         (ostid_seq(&oa->o_oi) == FID_SEQ_ECHO));
2260                 rc = obd_create(env, ec->ec_exp, oa, &lsm, oti);
2261                 if (rc != 0) {
2262                         CERROR("Cannot create objects: rc = %d\n", rc);
2263                         GOTO(failed, rc);
2264                 }
2265                 created = 1;
2266         }
2267
2268         /* See what object ID we were given */
2269         oa->o_oi = lsm->lsm_oi;
2270         oa->o_valid |= OBD_MD_FLID;
2271
2272         eco = cl_echo_object_find(ed, &lsm);
2273         if (IS_ERR(eco))
2274                 GOTO(failed, rc = PTR_ERR(eco));
2275         cl_echo_object_put(eco);
2276
2277         CDEBUG(D_INFO, "oa oid "DOSTID"\n", POSTID(&oa->o_oi));
2278
2279  failed:
2280         if (created && rc)
2281                 obd_destroy(env, ec->ec_exp, oa, lsm, oti, NULL, NULL);
2282         if (lsm)
2283                 echo_free_memmd(ed, &lsm);
2284         if (rc)
2285                 CERROR("create object failed with: rc = %d\n", rc);
2286         return (rc);
2287 }
2288
2289 static int echo_get_object(struct echo_object **ecop, struct echo_device *ed,
2290                            struct obdo *oa)
2291 {
2292         struct lov_stripe_md   *lsm = NULL;
2293         struct echo_object     *eco;
2294         int                  rc;
2295
2296         if ((oa->o_valid & OBD_MD_FLID) == 0 || ostid_id(&oa->o_oi) == 0) {
2297                 /* disallow use of object id 0 */
2298                 CERROR ("No valid oid\n");
2299                 return -EINVAL;
2300         }
2301
2302         rc = echo_alloc_memmd(ed, &lsm);
2303         if (rc < 0)
2304                 return rc;
2305
2306         lsm->lsm_oi = oa->o_oi;
2307         if (!(oa->o_valid & OBD_MD_FLGROUP))
2308                 ostid_set_seq_echo(&lsm->lsm_oi);
2309
2310         rc = 0;
2311         eco = cl_echo_object_find(ed, &lsm);
2312         if (!IS_ERR(eco))
2313                 *ecop = eco;
2314         else
2315                 rc = PTR_ERR(eco);
2316         if (lsm)
2317                 echo_free_memmd(ed, &lsm);
2318         return rc;
2319 }
2320
2321 static void echo_put_object(struct echo_object *eco)
2322 {
2323         if (cl_echo_object_put(eco))
2324                 CERROR("echo client: drop an object failed");
2325 }
2326
2327 static void
2328 echo_get_stripe_off_id (struct lov_stripe_md *lsm, obd_off *offp, obd_id *idp)
2329 {
2330         unsigned long stripe_count;
2331         unsigned long stripe_size;
2332         unsigned long width;
2333         unsigned long woffset;
2334         int        stripe_index;
2335         obd_off       offset;
2336
2337         if (lsm->lsm_stripe_count <= 1)
2338                 return;
2339
2340         offset       = *offp;
2341         stripe_size  = lsm->lsm_stripe_size;
2342         stripe_count = lsm->lsm_stripe_count;
2343
2344         /* width = # bytes in all stripes */
2345         width = stripe_size * stripe_count;
2346
2347         /* woffset = offset within a width; offset = whole number of widths */
2348         woffset = do_div (offset, width);
2349
2350         stripe_index = woffset / stripe_size;
2351
2352         *idp = ostid_id(&lsm->lsm_oinfo[stripe_index]->loi_oi);
2353         *offp = offset * stripe_size + woffset % stripe_size;
2354 }
2355
2356 static void
2357 echo_client_page_debug_setup(struct lov_stripe_md *lsm,
2358                              struct page *page, int rw, obd_id id,
2359                              obd_off offset, obd_off count)
2360 {
2361         char    *addr;
2362         obd_off  stripe_off;
2363         obd_id   stripe_id;
2364         int      delta;
2365
2366         /* no partial pages on the client */
2367         LASSERT(count == PAGE_CACHE_SIZE);
2368
2369         addr = kmap(page);
2370
2371         for (delta = 0; delta < PAGE_CACHE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) {
2372                 if (rw == OBD_BRW_WRITE) {
2373                         stripe_off = offset + delta;
2374                         stripe_id = id;
2375                         echo_get_stripe_off_id(lsm, &stripe_off, &stripe_id);
2376                 } else {
2377                         stripe_off = 0xdeadbeef00c0ffeeULL;
2378                         stripe_id = 0xdeadbeef00c0ffeeULL;
2379                 }
2380                 block_debug_setup(addr + delta, OBD_ECHO_BLOCK_SIZE,
2381                                   stripe_off, stripe_id);
2382         }
2383
2384         kunmap(page);
2385 }
2386
2387 static int echo_client_page_debug_check(struct lov_stripe_md *lsm,
2388                                         struct page *page, obd_id id,
2389                                         obd_off offset, obd_off count)
2390 {
2391         obd_off stripe_off;
2392         obd_id  stripe_id;
2393         char   *addr;
2394         int     delta;
2395         int     rc;
2396         int     rc2;
2397
2398         /* no partial pages on the client */
2399         LASSERT(count == PAGE_CACHE_SIZE);
2400
2401         addr = kmap(page);
2402
2403         for (rc = delta = 0; delta < PAGE_CACHE_SIZE; delta += OBD_ECHO_BLOCK_SIZE) {
2404                 stripe_off = offset + delta;
2405                 stripe_id = id;
2406                 echo_get_stripe_off_id (lsm, &stripe_off, &stripe_id);
2407
2408                 rc2 = block_debug_check("test_brw",
2409                                         addr + delta, OBD_ECHO_BLOCK_SIZE,
2410                                         stripe_off, stripe_id);
2411                 if (rc2 != 0) {
2412                         CERROR ("Error in echo object "LPX64"\n", id);
2413                         rc = rc2;
2414                 }
2415         }
2416
2417         kunmap(page);
2418         return rc;
2419 }
2420
2421 static int echo_client_kbrw(struct echo_device *ed, int rw, struct obdo *oa,
2422                             struct echo_object *eco, obd_off offset,
2423                             obd_size count, int async,
2424                             struct obd_trans_info *oti)
2425 {
2426         struct lov_stripe_md   *lsm = eco->eo_lsm;
2427         obd_count              npages;
2428         struct brw_page *pga;
2429         struct brw_page *pgp;
2430         struct page         **pages;
2431         obd_off          off;
2432         int                  i;
2433         int                  rc;
2434         int                  verify;
2435         int                  gfp_mask;
2436         int                  brw_flags = 0;
2437
2438         verify = (ostid_id(&oa->o_oi) != ECHO_PERSISTENT_OBJID &&
2439                   (oa->o_valid & OBD_MD_FLFLAGS) != 0 &&
2440                   (oa->o_flags & OBD_FL_DEBUG_CHECK) != 0);
2441
2442         gfp_mask = ((ostid_id(&oa->o_oi) & 2) == 0) ? GFP_IOFS : GFP_HIGHUSER;
2443
2444         LASSERT(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ);
2445         LASSERT(lsm != NULL);
2446         LASSERT(ostid_id(&lsm->lsm_oi) == ostid_id(&oa->o_oi));
2447
2448         if (count <= 0 ||
2449             (count & (~CFS_PAGE_MASK)) != 0)
2450                 return -EINVAL;
2451
2452         /* XXX think again with misaligned I/O */
2453         npages = count >> PAGE_CACHE_SHIFT;
2454
2455         if (rw == OBD_BRW_WRITE)
2456                 brw_flags = OBD_BRW_ASYNC;
2457
2458         OBD_ALLOC(pga, npages * sizeof(*pga));
2459         if (pga == NULL)
2460                 return -ENOMEM;
2461
2462         OBD_ALLOC(pages, npages * sizeof(*pages));
2463         if (pages == NULL) {
2464                 OBD_FREE(pga, npages * sizeof(*pga));
2465                 return -ENOMEM;
2466         }
2467
2468         for (i = 0, pgp = pga, off = offset;
2469              i < npages;
2470              i++, pgp++, off += PAGE_CACHE_SIZE) {
2471
2472                 LASSERT (pgp->pg == NULL);      /* for cleanup */
2473
2474                 rc = -ENOMEM;
2475                 OBD_PAGE_ALLOC(pgp->pg, gfp_mask);
2476                 if (pgp->pg == NULL)
2477                         goto out;
2478
2479                 pages[i] = pgp->pg;
2480                 pgp->count = PAGE_CACHE_SIZE;
2481                 pgp->off = off;
2482                 pgp->flag = brw_flags;
2483
2484                 if (verify)
2485                         echo_client_page_debug_setup(lsm, pgp->pg, rw,
2486                                                      ostid_id(&oa->o_oi), off,
2487                                                      pgp->count);
2488         }
2489
2490         /* brw mode can only be used at client */
2491         LASSERT(ed->ed_next != NULL);
2492         rc = cl_echo_object_brw(eco, rw, offset, pages, npages, async);
2493
2494  out:
2495         if (rc != 0 || rw != OBD_BRW_READ)
2496                 verify = 0;
2497
2498         for (i = 0, pgp = pga; i < npages; i++, pgp++) {
2499                 if (pgp->pg == NULL)
2500                         continue;
2501
2502                 if (verify) {
2503                         int vrc;
2504                         vrc = echo_client_page_debug_check(lsm, pgp->pg,
2505                                                            ostid_id(&oa->o_oi),
2506                                                            pgp->off, pgp->count);
2507                         if (vrc != 0 && rc == 0)
2508                                 rc = vrc;
2509                 }
2510                 OBD_PAGE_FREE(pgp->pg);
2511         }
2512         OBD_FREE(pga, npages * sizeof(*pga));
2513         OBD_FREE(pages, npages * sizeof(*pages));
2514         return rc;
2515 }
2516
2517 static int echo_client_prep_commit(const struct lu_env *env,
2518                                    struct obd_export *exp, int rw,
2519                                    struct obdo *oa, struct echo_object *eco,
2520                                    obd_off offset, obd_size count,
2521                                    obd_size batch, struct obd_trans_info *oti,
2522                                    int async)
2523 {
2524         struct lov_stripe_md *lsm = eco->eo_lsm;
2525         struct obd_ioobj ioo;
2526         struct niobuf_local *lnb;
2527         struct niobuf_remote *rnb;
2528         obd_off off;
2529         obd_size npages, tot_pages;
2530         int i, ret = 0, brw_flags = 0;
2531
2532         if (count <= 0 || (count & (~CFS_PAGE_MASK)) != 0 ||
2533             (lsm != NULL && ostid_id(&lsm->lsm_oi) != ostid_id(&oa->o_oi)))
2534                 return -EINVAL;
2535
2536         npages = batch >> PAGE_CACHE_SHIFT;
2537         tot_pages = count >> PAGE_CACHE_SHIFT;
2538
2539         OBD_ALLOC(lnb, npages * sizeof(struct niobuf_local));
2540         OBD_ALLOC(rnb, npages * sizeof(struct niobuf_remote));
2541
2542         if (lnb == NULL || rnb == NULL)
2543                 GOTO(out, ret = -ENOMEM);
2544
2545         if (rw == OBD_BRW_WRITE && async)
2546                 brw_flags |= OBD_BRW_ASYNC;
2547
2548         obdo_to_ioobj(oa, &ioo);
2549
2550         off = offset;
2551
2552         for(; tot_pages; tot_pages -= npages) {
2553                 int lpages;
2554
2555                 if (tot_pages < npages)
2556                         npages = tot_pages;
2557
2558                 for (i = 0; i < npages; i++, off += PAGE_CACHE_SIZE) {
2559                         rnb[i].offset = off;
2560                         rnb[i].len = PAGE_CACHE_SIZE;
2561                         rnb[i].flags = brw_flags;
2562                 }
2563
2564                 ioo.ioo_bufcnt = npages;
2565                 oti->oti_transno = 0;
2566
2567                 lpages = npages;
2568                 ret = obd_preprw(env, rw, exp, oa, 1, &ioo, rnb, &lpages,
2569                                  lnb, oti, NULL);
2570                 if (ret != 0)
2571                         GOTO(out, ret);
2572                 LASSERT(lpages == npages);
2573
2574                 for (i = 0; i < lpages; i++) {
2575                         struct page *page = lnb[i].page;
2576
2577                         /* read past eof? */
2578                         if (page == NULL && lnb[i].rc == 0)
2579                                 continue;
2580
2581                         if (async)
2582                                 lnb[i].flags |= OBD_BRW_ASYNC;
2583
2584                         if (ostid_id(&oa->o_oi) == ECHO_PERSISTENT_OBJID ||
2585                             (oa->o_valid & OBD_MD_FLFLAGS) == 0 ||
2586                             (oa->o_flags & OBD_FL_DEBUG_CHECK) == 0)
2587                                 continue;
2588
2589                         if (rw == OBD_BRW_WRITE)
2590                                 echo_client_page_debug_setup(lsm, page, rw,
2591                                                             ostid_id(&oa->o_oi),
2592                                                              rnb[i].offset,
2593                                                              rnb[i].len);
2594                         else
2595                                 echo_client_page_debug_check(lsm, page,
2596                                                             ostid_id(&oa->o_oi),
2597                                                              rnb[i].offset,
2598                                                              rnb[i].len);
2599                 }
2600
2601                 ret = obd_commitrw(env, rw, exp, oa, 1, &ioo,
2602                                    rnb, npages, lnb, oti, ret);
2603                 if (ret != 0)
2604                         GOTO(out, ret);
2605
2606                 /* Reset oti otherwise it would confuse ldiskfs. */
2607                 memset(oti, 0, sizeof(*oti));
2608
2609                 /* Reuse env context. */
2610                 lu_context_exit((struct lu_context *)&env->le_ctx);
2611                 lu_context_enter((struct lu_context *)&env->le_ctx);
2612         }
2613
2614 out:
2615         if (lnb)
2616                 OBD_FREE(lnb, npages * sizeof(struct niobuf_local));
2617         if (rnb)
2618                 OBD_FREE(rnb, npages * sizeof(struct niobuf_remote));
2619         return ret;
2620 }
2621
2622 static int echo_client_brw_ioctl(const struct lu_env *env, int rw,
2623                                  struct obd_export *exp,
2624                                  struct obd_ioctl_data *data,
2625                                  struct obd_trans_info *dummy_oti)
2626 {
2627         struct obd_device *obd = class_exp2obd(exp);
2628         struct echo_device *ed = obd2echo_dev(obd);
2629         struct echo_client_obd *ec = ed->ed_ec;
2630         struct obdo *oa = &data->ioc_obdo1;
2631         struct echo_object *eco;
2632         int rc;
2633         int async = 1;
2634         long test_mode;
2635
2636         LASSERT(oa->o_valid & OBD_MD_FLGROUP);
2637
2638         rc = echo_get_object(&eco, ed, oa);
2639         if (rc)
2640                 return rc;
2641
2642         oa->o_valid &= ~OBD_MD_FLHANDLE;
2643
2644         /* OFD/obdfilter works only via prep/commit */
2645         test_mode = (long)data->ioc_pbuf1;
2646         if (test_mode == 1)
2647                 async = 0;
2648
2649         if (ed->ed_next == NULL && test_mode != 3) {
2650                 test_mode = 3;
2651                 data->ioc_plen1 = data->ioc_count;
2652         }
2653
2654         /* Truncate batch size to maximum */
2655         if (data->ioc_plen1 > PTLRPC_MAX_BRW_SIZE)
2656                 data->ioc_plen1 = PTLRPC_MAX_BRW_SIZE;
2657
2658         switch (test_mode) {
2659         case 1:
2660                 /* fall through */
2661         case 2:
2662                 rc = echo_client_kbrw(ed, rw, oa,
2663                                       eco, data->ioc_offset,
2664                                       data->ioc_count, async, dummy_oti);
2665                 break;
2666         case 3:
2667                 rc = echo_client_prep_commit(env, ec->ec_exp, rw, oa,
2668                                              eco, data->ioc_offset,
2669                                              data->ioc_count, data->ioc_plen1,
2670                                              dummy_oti, async);
2671                 break;
2672         default:
2673                 rc = -EINVAL;
2674         }
2675         echo_put_object(eco);
2676         return rc;
2677 }
2678
2679 static int
2680 echo_client_enqueue(struct obd_export *exp, struct obdo *oa,
2681                     int mode, obd_off offset, obd_size nob)
2682 {
2683         struct echo_device     *ed = obd2echo_dev(exp->exp_obd);
2684         struct lustre_handle   *ulh = &oa->o_handle;
2685         struct echo_object     *eco;
2686         obd_off          end;
2687         int                  rc;
2688
2689         if (ed->ed_next == NULL)
2690                 return -EOPNOTSUPP;
2691
2692         if (!(mode == LCK_PR || mode == LCK_PW))
2693                 return -EINVAL;
2694
2695         if ((offset & (~CFS_PAGE_MASK)) != 0 ||
2696             (nob & (~CFS_PAGE_MASK)) != 0)
2697                 return -EINVAL;
2698
2699         rc = echo_get_object (&eco, ed, oa);
2700         if (rc != 0)
2701                 return rc;
2702
2703         end = (nob == 0) ? ((obd_off) -1) : (offset + nob - 1);
2704         rc = cl_echo_enqueue(eco, offset, end, mode, &ulh->cookie);
2705         if (rc == 0) {
2706                 oa->o_valid |= OBD_MD_FLHANDLE;
2707                 CDEBUG(D_INFO, "Cookie is "LPX64"\n", ulh->cookie);
2708         }
2709         echo_put_object(eco);
2710         return rc;
2711 }
2712
2713 static int
2714 echo_client_cancel(struct obd_export *exp, struct obdo *oa)
2715 {
2716         struct echo_device *ed     = obd2echo_dev(exp->exp_obd);
2717         __u64          cookie = oa->o_handle.cookie;
2718
2719         if ((oa->o_valid & OBD_MD_FLHANDLE) == 0)
2720                 return -EINVAL;
2721
2722         CDEBUG(D_INFO, "Cookie is "LPX64"\n", cookie);
2723         return cl_echo_cancel(ed, cookie);
2724 }
2725
2726 static int
2727 echo_client_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
2728                       void *karg, void *uarg)
2729 {
2730         struct obd_device      *obd = exp->exp_obd;
2731         struct echo_device     *ed = obd2echo_dev(obd);
2732         struct echo_client_obd *ec = ed->ed_ec;
2733         struct echo_object     *eco;
2734         struct obd_ioctl_data  *data = karg;
2735         struct obd_trans_info   dummy_oti;
2736         struct lu_env     *env;
2737         struct oti_req_ack_lock *ack_lock;
2738         struct obdo         *oa;
2739         struct lu_fid      fid;
2740         int                  rw = OBD_BRW_READ;
2741         int                  rc = 0;
2742         int                  i;
2743
2744         memset(&dummy_oti, 0, sizeof(dummy_oti));
2745
2746         oa = &data->ioc_obdo1;
2747         if (!(oa->o_valid & OBD_MD_FLGROUP)) {
2748                 oa->o_valid |= OBD_MD_FLGROUP;
2749                 ostid_set_seq_echo(&oa->o_oi);
2750         }
2751
2752         /* This FID is unpacked just for validation at this point */
2753         rc = ostid_to_fid(&fid, &oa->o_oi, 0);
2754         if (rc < 0)
2755                 return rc;
2756
2757         OBD_ALLOC_PTR(env);
2758         if (env == NULL)
2759                 return -ENOMEM;
2760
2761         rc = lu_env_init(env, LCT_DT_THREAD);
2762         if (rc)
2763                 GOTO(out, rc = -ENOMEM);
2764
2765         switch (cmd) {
2766         case OBD_IOC_CREATE:                /* may create echo object */
2767                 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
2768                         GOTO (out, rc = -EPERM);
2769
2770                 rc = echo_create_object(env, ed, 1, oa, data->ioc_pbuf1,
2771                                         data->ioc_plen1, &dummy_oti);
2772                 GOTO(out, rc);
2773
2774         case OBD_IOC_ECHO_MD: {
2775                 int count;
2776                 int cmd;
2777                 char *dir = NULL;
2778                 int dirlen;
2779                 __u64 id;
2780
2781                 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
2782                         GOTO(out, rc = -EPERM);
2783
2784                 count = data->ioc_count;
2785                 cmd = data->ioc_command;
2786
2787                 id = ostid_id(&data->ioc_obdo2.o_oi);
2788
2789                 dirlen = data->ioc_plen1;
2790                 OBD_ALLOC(dir, dirlen + 1);
2791                 if (dir == NULL)
2792                         GOTO(out, rc = -ENOMEM);
2793
2794                 if (copy_from_user(dir, data->ioc_pbuf1, dirlen)) {
2795                         OBD_FREE(dir, data->ioc_plen1 + 1);
2796                         GOTO(out, rc = -EFAULT);
2797                 }
2798
2799                 rc = echo_md_handler(ed, cmd, dir, dirlen, id, count, data);
2800                 OBD_FREE(dir, dirlen + 1);
2801                 GOTO(out, rc);
2802         }
2803         case OBD_IOC_ECHO_ALLOC_SEQ: {
2804                 struct lu_env   *cl_env;
2805                 int           refcheck;
2806                 __u64       seq;
2807                 int           max_count;
2808
2809                 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
2810                         GOTO(out, rc = -EPERM);
2811
2812                 cl_env = cl_env_get(&refcheck);
2813                 if (IS_ERR(cl_env))
2814                         GOTO(out, rc = PTR_ERR(cl_env));
2815
2816                 rc = lu_env_refill_by_tags(cl_env, ECHO_MD_CTX_TAG,
2817                                             ECHO_MD_SES_TAG);
2818                 if (rc != 0) {
2819                         cl_env_put(cl_env, &refcheck);
2820                         GOTO(out, rc);
2821                 }
2822
2823                 rc = seq_client_get_seq(cl_env, ed->ed_cl_seq, &seq);
2824                 cl_env_put(cl_env, &refcheck);
2825                 if (rc < 0) {
2826                         CERROR("%s: Can not alloc seq: rc = %d\n",
2827                                obd->obd_name, rc);
2828                         GOTO(out, rc);
2829                 }
2830
2831                 if (copy_to_user(data->ioc_pbuf1, &seq, data->ioc_plen1))
2832                         return -EFAULT;
2833
2834                 max_count = LUSTRE_METADATA_SEQ_MAX_WIDTH;
2835                 if (copy_to_user(data->ioc_pbuf2, &max_count,
2836                                      data->ioc_plen2))
2837                         return -EFAULT;
2838                 GOTO(out, rc);
2839         }
2840         case OBD_IOC_DESTROY:
2841                 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
2842                         GOTO (out, rc = -EPERM);
2843
2844                 rc = echo_get_object(&eco, ed, oa);
2845                 if (rc == 0) {
2846                         rc = obd_destroy(env, ec->ec_exp, oa, eco->eo_lsm,
2847                                          &dummy_oti, NULL, NULL);
2848                         if (rc == 0)
2849                                 eco->eo_deleted = 1;
2850                         echo_put_object(eco);
2851                 }
2852                 GOTO(out, rc);
2853
2854         case OBD_IOC_GETATTR:
2855                 rc = echo_get_object(&eco, ed, oa);
2856                 if (rc == 0) {
2857                         struct obd_info oinfo = { { { 0 } } };
2858                         oinfo.oi_md = eco->eo_lsm;
2859                         oinfo.oi_oa = oa;
2860                         rc = obd_getattr(env, ec->ec_exp, &oinfo);
2861                         echo_put_object(eco);
2862                 }
2863                 GOTO(out, rc);
2864
2865         case OBD_IOC_SETATTR:
2866                 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
2867                         GOTO (out, rc = -EPERM);
2868
2869                 rc = echo_get_object(&eco, ed, oa);
2870                 if (rc == 0) {
2871                         struct obd_info oinfo = { { { 0 } } };
2872                         oinfo.oi_oa = oa;
2873                         oinfo.oi_md = eco->eo_lsm;
2874
2875                         rc = obd_setattr(env, ec->ec_exp, &oinfo, NULL);
2876                         echo_put_object(eco);
2877                 }
2878                 GOTO(out, rc);
2879
2880         case OBD_IOC_BRW_WRITE:
2881                 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
2882                         GOTO (out, rc = -EPERM);
2883
2884                 rw = OBD_BRW_WRITE;
2885                 /* fall through */
2886         case OBD_IOC_BRW_READ:
2887                 rc = echo_client_brw_ioctl(env, rw, exp, data, &dummy_oti);
2888                 GOTO(out, rc);
2889
2890         case ECHO_IOC_GET_STRIPE:
2891                 rc = echo_get_object(&eco, ed, oa);
2892                 if (rc == 0) {
2893                         rc = echo_copyout_lsm(eco->eo_lsm, data->ioc_pbuf1,
2894                                               data->ioc_plen1);
2895                         echo_put_object(eco);
2896                 }
2897                 GOTO(out, rc);
2898
2899         case ECHO_IOC_SET_STRIPE:
2900                 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
2901                         GOTO (out, rc = -EPERM);
2902
2903                 if (data->ioc_pbuf1 == NULL) {  /* unset */
2904                         rc = echo_get_object(&eco, ed, oa);
2905                         if (rc == 0) {
2906                                 eco->eo_deleted = 1;
2907                                 echo_put_object(eco);
2908                         }
2909                 } else {
2910                         rc = echo_create_object(env, ed, 0, oa,
2911                                                 data->ioc_pbuf1,
2912                                                 data->ioc_plen1, &dummy_oti);
2913                 }
2914                 GOTO (out, rc);
2915
2916         case ECHO_IOC_ENQUEUE:
2917                 if (!cfs_capable(CFS_CAP_SYS_ADMIN))
2918                         GOTO (out, rc = -EPERM);
2919
2920                 rc = echo_client_enqueue(exp, oa,
2921                                          data->ioc_conn1, /* lock mode */
2922                                          data->ioc_offset,
2923                                          data->ioc_count);/*extent*/
2924                 GOTO (out, rc);
2925
2926         case ECHO_IOC_CANCEL:
2927                 rc = echo_client_cancel(exp, oa);
2928                 GOTO (out, rc);
2929
2930         default:
2931                 CERROR ("echo_ioctl(): unrecognised ioctl %#x\n", cmd);
2932                 GOTO (out, rc = -ENOTTY);
2933         }
2934
2935 out:
2936         lu_env_fini(env);
2937         OBD_FREE_PTR(env);
2938
2939         /* XXX this should be in a helper also called by target_send_reply */
2940         for (ack_lock = dummy_oti.oti_ack_locks, i = 0; i < 4;
2941              i++, ack_lock++) {
2942                 if (!ack_lock->mode)
2943                         break;
2944                 ldlm_lock_decref(&ack_lock->lock, ack_lock->mode);
2945         }
2946
2947         return rc;
2948 }
2949
2950 static int echo_client_setup(const struct lu_env *env,
2951                              struct obd_device *obddev, struct lustre_cfg *lcfg)
2952 {
2953         struct echo_client_obd *ec = &obddev->u.echo_client;
2954         struct obd_device *tgt;
2955         struct obd_uuid echo_uuid = { "ECHO_UUID" };
2956         struct obd_connect_data *ocd = NULL;
2957         int rc;
2958
2959         if (lcfg->lcfg_bufcount < 2 || LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) {
2960                 CERROR("requires a TARGET OBD name\n");
2961                 return -EINVAL;
2962         }
2963
2964         tgt = class_name2obd(lustre_cfg_string(lcfg, 1));
2965         if (!tgt || !tgt->obd_attached || !tgt->obd_set_up) {
2966                 CERROR("device not attached or not set up (%s)\n",
2967                        lustre_cfg_string(lcfg, 1));
2968                 return -EINVAL;
2969         }
2970
2971         spin_lock_init(&ec->ec_lock);
2972         INIT_LIST_HEAD (&ec->ec_objects);
2973         INIT_LIST_HEAD (&ec->ec_locks);
2974         ec->ec_unique = 0;
2975         ec->ec_nstripes = 0;
2976
2977         if (!strcmp(tgt->obd_type->typ_name, LUSTRE_MDT_NAME)) {
2978                 lu_context_tags_update(ECHO_MD_CTX_TAG);
2979                 lu_session_tags_update(ECHO_MD_SES_TAG);
2980                 return 0;
2981         }
2982
2983         OBD_ALLOC(ocd, sizeof(*ocd));
2984         if (ocd == NULL) {
2985                 CERROR("Can't alloc ocd connecting to %s\n",
2986                        lustre_cfg_string(lcfg, 1));
2987                 return -ENOMEM;
2988         }
2989
2990         ocd->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_REQPORTAL |
2991                                  OBD_CONNECT_BRW_SIZE |
2992                                  OBD_CONNECT_GRANT | OBD_CONNECT_FULL20 |
2993                                  OBD_CONNECT_64BITHASH | OBD_CONNECT_LVB_TYPE |
2994                                  OBD_CONNECT_FID;
2995         ocd->ocd_brw_size = DT_MAX_BRW_SIZE;
2996         ocd->ocd_version = LUSTRE_VERSION_CODE;
2997         ocd->ocd_group = FID_SEQ_ECHO;
2998
2999         rc = obd_connect(env, &ec->ec_exp, tgt, &echo_uuid, ocd, NULL);
3000         if (rc == 0) {
3001                 /* Turn off pinger because it connects to tgt obd directly. */
3002                 spin_lock(&tgt->obd_dev_lock);
3003                 list_del_init(&ec->ec_exp->exp_obd_chain_timed);
3004                 spin_unlock(&tgt->obd_dev_lock);
3005         }
3006
3007         OBD_FREE(ocd, sizeof(*ocd));
3008
3009         if (rc != 0) {
3010                 CERROR("fail to connect to device %s\n",
3011                        lustre_cfg_string(lcfg, 1));
3012                 return (rc);
3013         }
3014
3015         return rc;
3016 }
3017
3018 static int echo_client_cleanup(struct obd_device *obddev)
3019 {
3020         struct echo_device *ed = obd2echo_dev(obddev);
3021         struct echo_client_obd *ec = &obddev->u.echo_client;
3022         int rc;
3023
3024         /*Do nothing for Metadata echo client*/
3025         if (ed == NULL )
3026                 return 0;
3027
3028         if (ed->ed_next_ismd) {
3029                 lu_context_tags_clear(ECHO_MD_CTX_TAG);
3030                 lu_session_tags_clear(ECHO_MD_SES_TAG);
3031                 return 0;
3032         }
3033
3034         if (!list_empty(&obddev->obd_exports)) {
3035                 CERROR("still has clients!\n");
3036                 return -EBUSY;
3037         }
3038
3039         LASSERT(atomic_read(&ec->ec_exp->exp_refcount) > 0);
3040         rc = obd_disconnect(ec->ec_exp);
3041         if (rc != 0)
3042                 CERROR("fail to disconnect device: %d\n", rc);
3043
3044         return rc;
3045 }
3046
3047 static int echo_client_connect(const struct lu_env *env,
3048                                struct obd_export **exp,
3049                                struct obd_device *src, struct obd_uuid *cluuid,
3050                                struct obd_connect_data *data, void *localdata)
3051 {
3052         int             rc;
3053         struct lustre_handle conn = { 0 };
3054
3055         rc = class_connect(&conn, src, cluuid);
3056         if (rc == 0) {
3057                 *exp = class_conn2export(&conn);
3058         }
3059
3060         return rc;
3061 }
3062
3063 static int echo_client_disconnect(struct obd_export *exp)
3064 {
3065 #if 0
3066         struct obd_device      *obd;
3067         struct echo_client_obd *ec;
3068         struct ec_lock   *ecl;
3069 #endif
3070         int                  rc;
3071
3072         if (exp == NULL)
3073                 GOTO(out, rc = -EINVAL);
3074
3075 #if 0
3076         obd = exp->exp_obd;
3077         ec = &obd->u.echo_client;
3078
3079         /* no more contention on export's lock list */
3080         while (!list_empty (&exp->exp_ec_data.eced_locks)) {
3081                 ecl = list_entry (exp->exp_ec_data.eced_locks.next,
3082                                       struct ec_lock, ecl_exp_chain);
3083                 list_del (&ecl->ecl_exp_chain);
3084
3085                 rc = obd_cancel(ec->ec_exp, ecl->ecl_object->eco_lsm,
3086                                  ecl->ecl_mode, &ecl->ecl_lock_handle);
3087
3088                 CDEBUG (D_INFO, "Cancel lock on object "LPX64" on disconnect "
3089                         "(%d)\n", ecl->ecl_object->eco_id, rc);
3090
3091                 echo_put_object (ecl->ecl_object);
3092                 OBD_FREE (ecl, sizeof (*ecl));
3093         }
3094 #endif
3095
3096         rc = class_disconnect(exp);
3097         GOTO(out, rc);
3098  out:
3099         return rc;
3100 }
3101
3102 static struct obd_ops echo_client_obd_ops = {
3103         .o_owner       = THIS_MODULE,
3104
3105 #if 0
3106         .o_setup       = echo_client_setup,
3107         .o_cleanup     = echo_client_cleanup,
3108 #endif
3109
3110         .o_iocontrol   = echo_client_iocontrol,
3111         .o_connect     = echo_client_connect,
3112         .o_disconnect  = echo_client_disconnect
3113 };
3114
3115 int echo_client_init(void)
3116 {
3117         struct lprocfs_static_vars lvars = { 0 };
3118         int rc;
3119
3120         lprocfs_echo_init_vars(&lvars);
3121
3122         rc = lu_kmem_init(echo_caches);
3123         if (rc == 0) {
3124                 rc = class_register_type(&echo_client_obd_ops, NULL,
3125                                          lvars.module_vars,
3126                                          LUSTRE_ECHO_CLIENT_NAME,
3127                                          &echo_device_type);
3128                 if (rc)
3129                         lu_kmem_fini(echo_caches);
3130         }
3131         return rc;
3132 }
3133
3134 void echo_client_exit(void)
3135 {
3136         class_unregister_type(LUSTRE_ECHO_CLIENT_NAME);
3137         lu_kmem_fini(echo_caches);
3138 }
3139
3140 static int __init obdecho_init(void)
3141 {
3142         struct lprocfs_static_vars lvars;
3143         int rc;
3144
3145         LCONSOLE_INFO("Echo OBD driver; http://www.lustre.org/\n");
3146
3147         LASSERT(PAGE_CACHE_SIZE % OBD_ECHO_BLOCK_SIZE == 0);
3148
3149         lprocfs_echo_init_vars(&lvars);
3150
3151
3152         rc = echo_client_init();
3153
3154         return rc;
3155 }
3156
3157 static void /*__exit*/ obdecho_exit(void)
3158 {
3159         echo_client_exit();
3160
3161 }
3162
3163 MODULE_AUTHOR("Sun Microsystems, Inc. <http://www.lustre.org/>");
3164 MODULE_DESCRIPTION("Lustre Testing Echo OBD driver");
3165 MODULE_LICENSE("GPL");
3166 MODULE_VERSION(LUSTRE_VERSION_STRING);
3167
3168 module_init(obdecho_init);
3169 module_exit(obdecho_exit);
3170
3171 /** @} echo_client */