Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[cascardo/linux.git] / drivers / staging / lustre / lustre / llite / vvp_dev.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2012, 2015, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * cl_device and cl_device_type implementation for VVP layer.
33  *
34  *   Author: Nikita Danilov <nikita.danilov@sun.com>
35  *   Author: Jinshan Xiong <jinshan.xiong@intel.com>
36  */
37
38 #define DEBUG_SUBSYSTEM S_LLITE
39
40 #include "../include/obd.h"
41 #include "../include/lustre_lite.h"
42 #include "llite_internal.h"
43 #include "vvp_internal.h"
44
45 /*****************************************************************************
46  *
47  * Vvp device and device type functions.
48  *
49  */
50
51 /*
52  * vvp_ prefix stands for "Vfs Vm Posix". It corresponds to historical
53  * "llite_" (var. "ll_") prefix.
54  */
55
56 static struct kmem_cache *ll_thread_kmem;
57 struct kmem_cache *vvp_lock_kmem;
58 struct kmem_cache *vvp_object_kmem;
59 struct kmem_cache *vvp_req_kmem;
60 static struct kmem_cache *vvp_session_kmem;
61 static struct kmem_cache *vvp_thread_kmem;
62
63 static struct lu_kmem_descr vvp_caches[] = {
64         {
65                 .ckd_cache = &ll_thread_kmem,
66                 .ckd_name  = "ll_thread_kmem",
67                 .ckd_size  = sizeof(struct ll_thread_info),
68         },
69         {
70                 .ckd_cache = &vvp_lock_kmem,
71                 .ckd_name  = "vvp_lock_kmem",
72                 .ckd_size  = sizeof(struct vvp_lock),
73         },
74         {
75                 .ckd_cache = &vvp_object_kmem,
76                 .ckd_name  = "vvp_object_kmem",
77                 .ckd_size  = sizeof(struct vvp_object),
78         },
79         {
80                 .ckd_cache = &vvp_req_kmem,
81                 .ckd_name  = "vvp_req_kmem",
82                 .ckd_size  = sizeof(struct vvp_req),
83         },
84         {
85                 .ckd_cache = &vvp_session_kmem,
86                 .ckd_name  = "vvp_session_kmem",
87                 .ckd_size  = sizeof(struct vvp_session)
88         },
89         {
90                 .ckd_cache = &vvp_thread_kmem,
91                 .ckd_name  = "vvp_thread_kmem",
92                 .ckd_size  = sizeof(struct vvp_thread_info),
93         },
94         {
95                 .ckd_cache = NULL
96         }
97 };
98
99 static void *ll_thread_key_init(const struct lu_context *ctx,
100                                 struct lu_context_key *key)
101 {
102         struct vvp_thread_info *info;
103
104         info = kmem_cache_zalloc(ll_thread_kmem, GFP_NOFS);
105         if (!info)
106                 info = ERR_PTR(-ENOMEM);
107         return info;
108 }
109
110 static void ll_thread_key_fini(const struct lu_context *ctx,
111                                struct lu_context_key *key, void *data)
112 {
113         struct vvp_thread_info *info = data;
114
115         kmem_cache_free(ll_thread_kmem, info);
116 }
117
118 struct lu_context_key ll_thread_key = {
119         .lct_tags = LCT_CL_THREAD,
120         .lct_init = ll_thread_key_init,
121         .lct_fini = ll_thread_key_fini
122 };
123
124 static void *vvp_session_key_init(const struct lu_context *ctx,
125                                   struct lu_context_key *key)
126 {
127         struct vvp_session *session;
128
129         session = kmem_cache_zalloc(vvp_session_kmem, GFP_NOFS);
130         if (!session)
131                 session = ERR_PTR(-ENOMEM);
132         return session;
133 }
134
135 static void vvp_session_key_fini(const struct lu_context *ctx,
136                                  struct lu_context_key *key, void *data)
137 {
138         struct vvp_session *session = data;
139
140         kmem_cache_free(vvp_session_kmem, session);
141 }
142
143 struct lu_context_key vvp_session_key = {
144         .lct_tags = LCT_SESSION,
145         .lct_init = vvp_session_key_init,
146         .lct_fini = vvp_session_key_fini
147 };
148
149 static void *vvp_thread_key_init(const struct lu_context *ctx,
150                                  struct lu_context_key *key)
151 {
152         struct vvp_thread_info *vti;
153
154         vti = kmem_cache_zalloc(vvp_thread_kmem, GFP_NOFS);
155         if (!vti)
156                 vti = ERR_PTR(-ENOMEM);
157         return vti;
158 }
159
160 static void vvp_thread_key_fini(const struct lu_context *ctx,
161                                 struct lu_context_key *key, void *data)
162 {
163         struct vvp_thread_info *vti = data;
164
165         kmem_cache_free(vvp_thread_kmem, vti);
166 }
167
168 struct lu_context_key vvp_thread_key = {
169         .lct_tags = LCT_CL_THREAD,
170         .lct_init = vvp_thread_key_init,
171         .lct_fini = vvp_thread_key_fini
172 };
173
174 /* type constructor/destructor: vvp_type_{init,fini,start,stop}(). */
175 LU_TYPE_INIT_FINI(vvp, &vvp_thread_key, &ll_thread_key, &vvp_session_key);
176
177 static const struct lu_device_operations vvp_lu_ops = {
178         .ldo_object_alloc      = vvp_object_alloc
179 };
180
181 static const struct cl_device_operations vvp_cl_ops = {
182         .cdo_req_init = vvp_req_init
183 };
184
185 static struct lu_device *vvp_device_free(const struct lu_env *env,
186                                          struct lu_device *d)
187 {
188         struct vvp_device *vdv  = lu2vvp_dev(d);
189         struct cl_site    *site = lu2cl_site(d->ld_site);
190         struct lu_device  *next = cl2lu_dev(vdv->vdv_next);
191
192         if (d->ld_site) {
193                 cl_site_fini(site);
194                 kfree(site);
195         }
196         cl_device_fini(lu2cl_dev(d));
197         kfree(vdv);
198         return next;
199 }
200
201 static struct lu_device *vvp_device_alloc(const struct lu_env *env,
202                                           struct lu_device_type *t,
203                                           struct lustre_cfg *cfg)
204 {
205         struct vvp_device *vdv;
206         struct lu_device  *lud;
207         struct cl_site    *site;
208         int rc;
209
210         vdv = kzalloc(sizeof(*vdv), GFP_NOFS);
211         if (!vdv)
212                 return ERR_PTR(-ENOMEM);
213
214         lud = &vdv->vdv_cl.cd_lu_dev;
215         cl_device_init(&vdv->vdv_cl, t);
216         vvp2lu_dev(vdv)->ld_ops = &vvp_lu_ops;
217         vdv->vdv_cl.cd_ops = &vvp_cl_ops;
218
219         site = kzalloc(sizeof(*site), GFP_NOFS);
220         if (site) {
221                 rc = cl_site_init(site, &vdv->vdv_cl);
222                 if (rc == 0) {
223                         rc = lu_site_init_finish(&site->cs_lu);
224                 } else {
225                         LASSERT(!lud->ld_site);
226                         CERROR("Cannot init lu_site, rc %d.\n", rc);
227                         kfree(site);
228                 }
229         } else {
230                 rc = -ENOMEM;
231         }
232         if (rc != 0) {
233                 vvp_device_free(env, lud);
234                 lud = ERR_PTR(rc);
235         }
236         return lud;
237 }
238
239 static int vvp_device_init(const struct lu_env *env, struct lu_device *d,
240                            const char *name, struct lu_device *next)
241 {
242         struct vvp_device  *vdv;
243         int rc;
244
245         vdv = lu2vvp_dev(d);
246         vdv->vdv_next = lu2cl_dev(next);
247
248         LASSERT(d->ld_site && next->ld_type);
249         next->ld_site = d->ld_site;
250         rc = next->ld_type->ldt_ops->ldto_device_init(env, next,
251                                                       next->ld_type->ldt_name,
252                                                       NULL);
253         if (rc == 0) {
254                 lu_device_get(next);
255                 lu_ref_add(&next->ld_reference, "lu-stack", &lu_site_init);
256         }
257         return rc;
258 }
259
260 static struct lu_device *vvp_device_fini(const struct lu_env *env,
261                                          struct lu_device *d)
262 {
263         return cl2lu_dev(lu2vvp_dev(d)->vdv_next);
264 }
265
266 static const struct lu_device_type_operations vvp_device_type_ops = {
267         .ldto_init = vvp_type_init,
268         .ldto_fini = vvp_type_fini,
269
270         .ldto_start = vvp_type_start,
271         .ldto_stop  = vvp_type_stop,
272
273         .ldto_device_alloc = vvp_device_alloc,
274         .ldto_device_free       = vvp_device_free,
275         .ldto_device_init       = vvp_device_init,
276         .ldto_device_fini       = vvp_device_fini,
277 };
278
279 struct lu_device_type vvp_device_type = {
280         .ldt_tags     = LU_DEVICE_CL,
281         .ldt_name     = LUSTRE_VVP_NAME,
282         .ldt_ops      = &vvp_device_type_ops,
283         .ldt_ctx_tags = LCT_CL_THREAD
284 };
285
286 /**
287  * A mutex serializing calls to vvp_inode_fini() under extreme memory
288  * pressure, when environments cannot be allocated.
289  */
290 int vvp_global_init(void)
291 {
292         int rc;
293
294         rc = lu_kmem_init(vvp_caches);
295         if (rc != 0)
296                 return rc;
297
298         rc = lu_device_type_init(&vvp_device_type);
299         if (rc != 0)
300                 goto out_kmem;
301
302         return 0;
303
304 out_kmem:
305         lu_kmem_fini(vvp_caches);
306
307         return rc;
308 }
309
310 void vvp_global_fini(void)
311 {
312         lu_device_type_fini(&vvp_device_type);
313         lu_kmem_fini(vvp_caches);
314 }
315
316 /*****************************************************************************
317  *
318  * mirror obd-devices into cl devices.
319  *
320  */
321
322 int cl_sb_init(struct super_block *sb)
323 {
324         struct ll_sb_info *sbi;
325         struct cl_device  *cl;
326         struct lu_env     *env;
327         int rc = 0;
328         int refcheck;
329
330         sbi  = ll_s2sbi(sb);
331         env = cl_env_get(&refcheck);
332         if (!IS_ERR(env)) {
333                 cl = cl_type_setup(env, NULL, &vvp_device_type,
334                                    sbi->ll_dt_exp->exp_obd->obd_lu_dev);
335                 if (!IS_ERR(cl)) {
336                         cl2vvp_dev(cl)->vdv_sb = sb;
337                         sbi->ll_cl = cl;
338                         sbi->ll_site = cl2lu_dev(cl)->ld_site;
339                 }
340                 cl_env_put(env, &refcheck);
341         } else {
342                 rc = PTR_ERR(env);
343         }
344         return rc;
345 }
346
347 int cl_sb_fini(struct super_block *sb)
348 {
349         struct ll_sb_info *sbi;
350         struct lu_env     *env;
351         struct cl_device  *cld;
352         int             refcheck;
353         int             result;
354
355         sbi = ll_s2sbi(sb);
356         env = cl_env_get(&refcheck);
357         if (!IS_ERR(env)) {
358                 cld = sbi->ll_cl;
359
360                 if (cld) {
361                         cl_stack_fini(env, cld);
362                         sbi->ll_cl = NULL;
363                         sbi->ll_site = NULL;
364                 }
365                 cl_env_put(env, &refcheck);
366                 result = 0;
367         } else {
368                 CERROR("Cannot cleanup cl-stack due to memory shortage.\n");
369                 result = PTR_ERR(env);
370         }
371         /*
372          * If mount failed (sbi->ll_cl == NULL), and this there are no other
373          * mounts, stop device types manually (this usually happens
374          * automatically when last device is destroyed).
375          */
376         lu_types_stop();
377         return result;
378 }
379
380 /****************************************************************************
381  *
382  * debugfs/lustre/llite/$MNT/dump_page_cache
383  *
384  ****************************************************************************/
385
386 /*
387  * To represent contents of a page cache as a byte stream, following
388  * information if encoded in 64bit offset:
389  *
390  *       - file hash bucket in lu_site::ls_hash[]       28bits
391  *
392  *       - how far file is from bucket head           4bits
393  *
394  *       - page index                              32bits
395  *
396  * First two data identify a file in the cache uniquely.
397  */
398
399 #define PGC_OBJ_SHIFT (32 + 4)
400 #define PGC_DEPTH_SHIFT (32)
401
402 struct vvp_pgcache_id {
403         unsigned                 vpi_bucket;
404         unsigned                 vpi_depth;
405         uint32_t                 vpi_index;
406
407         unsigned                 vpi_curdep;
408         struct lu_object_header *vpi_obj;
409 };
410
411 static void vvp_pgcache_id_unpack(loff_t pos, struct vvp_pgcache_id *id)
412 {
413         CLASSERT(sizeof(pos) == sizeof(__u64));
414
415         id->vpi_index  = pos & 0xffffffff;
416         id->vpi_depth  = (pos >> PGC_DEPTH_SHIFT) & 0xf;
417         id->vpi_bucket = (unsigned long long)pos >> PGC_OBJ_SHIFT;
418 }
419
420 static loff_t vvp_pgcache_id_pack(struct vvp_pgcache_id *id)
421 {
422         return
423                 ((__u64)id->vpi_index) |
424                 ((__u64)id->vpi_depth  << PGC_DEPTH_SHIFT) |
425                 ((__u64)id->vpi_bucket << PGC_OBJ_SHIFT);
426 }
427
428 static int vvp_pgcache_obj_get(struct cfs_hash *hs, struct cfs_hash_bd *bd,
429                                struct hlist_node *hnode, void *data)
430 {
431         struct vvp_pgcache_id   *id  = data;
432         struct lu_object_header *hdr = cfs_hash_object(hs, hnode);
433
434         if (id->vpi_curdep-- > 0)
435                 return 0; /* continue */
436
437         if (lu_object_is_dying(hdr))
438                 return 1;
439
440         cfs_hash_get(hs, hnode);
441         id->vpi_obj = hdr;
442         return 1;
443 }
444
445 static struct cl_object *vvp_pgcache_obj(const struct lu_env *env,
446                                          struct lu_device *dev,
447                                          struct vvp_pgcache_id *id)
448 {
449         LASSERT(lu_device_is_cl(dev));
450
451         id->vpi_depth &= 0xf;
452         id->vpi_obj    = NULL;
453         id->vpi_curdep = id->vpi_depth;
454
455         cfs_hash_hlist_for_each(dev->ld_site->ls_obj_hash, id->vpi_bucket,
456                                 vvp_pgcache_obj_get, id);
457         if (id->vpi_obj) {
458                 struct lu_object *lu_obj;
459
460                 lu_obj = lu_object_locate(id->vpi_obj, dev->ld_type);
461                 if (lu_obj) {
462                         lu_object_ref_add(lu_obj, "dump", current);
463                         return lu2cl(lu_obj);
464                 }
465                 lu_object_put(env, lu_object_top(id->vpi_obj));
466
467         } else if (id->vpi_curdep > 0) {
468                 id->vpi_depth = 0xf;
469         }
470         return NULL;
471 }
472
473 static loff_t vvp_pgcache_find(const struct lu_env *env,
474                                struct lu_device *dev, loff_t pos)
475 {
476         struct cl_object     *clob;
477         struct lu_site       *site;
478         struct vvp_pgcache_id id;
479
480         site = dev->ld_site;
481         vvp_pgcache_id_unpack(pos, &id);
482
483         while (1) {
484                 if (id.vpi_bucket >= CFS_HASH_NHLIST(site->ls_obj_hash))
485                         return ~0ULL;
486                 clob = vvp_pgcache_obj(env, dev, &id);
487                 if (clob) {
488                         struct inode *inode = vvp_object_inode(clob);
489                         struct page *vmpage;
490                         int nr;
491
492                         nr = find_get_pages_contig(inode->i_mapping,
493                                                    id.vpi_index, 1, &vmpage);
494                         if (nr > 0) {
495                                 id.vpi_index = vmpage->index;
496                                 /* Cant support over 16T file */
497                                 nr = !(vmpage->index > 0xffffffff);
498                                 put_page(vmpage);
499                         }
500
501                         lu_object_ref_del(&clob->co_lu, "dump", current);
502                         cl_object_put(env, clob);
503                         if (nr > 0)
504                                 return vvp_pgcache_id_pack(&id);
505                 }
506                 /* to the next object. */
507                 ++id.vpi_depth;
508                 id.vpi_depth &= 0xf;
509                 if (id.vpi_depth == 0 && ++id.vpi_bucket == 0)
510                         return ~0ULL;
511                 id.vpi_index = 0;
512         }
513 }
514
515 #define seq_page_flag(seq, page, flag, has_flags) do {            \
516         if (test_bit(PG_##flag, &(page)->flags)) {                \
517                 seq_printf(seq, "%s"#flag, has_flags ? "|" : "");       \
518                 has_flags = 1;                                    \
519         }                                                              \
520 } while (0)
521
522 static void vvp_pgcache_page_show(const struct lu_env *env,
523                                   struct seq_file *seq, struct cl_page *page)
524 {
525         struct vvp_page *vpg;
526         struct page      *vmpage;
527         int           has_flags;
528
529         vpg = cl2vvp_page(cl_page_at(page, &vvp_device_type));
530         vmpage = vpg->vpg_page;
531         seq_printf(seq, " %5i | %p %p %s %s %s %s | %p "DFID"(%p) %lu %u [",
532                    0 /* gen */,
533                    vpg, page,
534                    "none",
535                    vpg->vpg_write_queued ? "wq" : "- ",
536                    vpg->vpg_defer_uptodate ? "du" : "- ",
537                    PageWriteback(vmpage) ? "wb" : "-",
538                    vmpage, PFID(ll_inode2fid(vmpage->mapping->host)),
539                    vmpage->mapping->host, vmpage->index,
540                    page_count(vmpage));
541         has_flags = 0;
542         seq_page_flag(seq, vmpage, locked, has_flags);
543         seq_page_flag(seq, vmpage, error, has_flags);
544         seq_page_flag(seq, vmpage, referenced, has_flags);
545         seq_page_flag(seq, vmpage, uptodate, has_flags);
546         seq_page_flag(seq, vmpage, dirty, has_flags);
547         seq_page_flag(seq, vmpage, writeback, has_flags);
548         seq_printf(seq, "%s]\n", has_flags ? "" : "-");
549 }
550
551 static int vvp_pgcache_show(struct seq_file *f, void *v)
552 {
553         loff_t             pos;
554         struct ll_sb_info       *sbi;
555         struct cl_object        *clob;
556         struct lu_env      *env;
557         struct vvp_pgcache_id    id;
558         int                   refcheck;
559         int                   result;
560
561         env = cl_env_get(&refcheck);
562         if (!IS_ERR(env)) {
563                 pos = *(loff_t *)v;
564                 vvp_pgcache_id_unpack(pos, &id);
565                 sbi = f->private;
566                 clob = vvp_pgcache_obj(env, &sbi->ll_cl->cd_lu_dev, &id);
567                 if (clob) {
568                         struct inode *inode = vvp_object_inode(clob);
569                         struct cl_page *page = NULL;
570                         struct page *vmpage;
571
572                         result = find_get_pages_contig(inode->i_mapping,
573                                                        id.vpi_index, 1,
574                                                        &vmpage);
575                         if (result > 0) {
576                                 lock_page(vmpage);
577                                 page = cl_vmpage_page(vmpage, clob);
578                                 unlock_page(vmpage);
579                                 put_page(vmpage);
580                         }
581
582                         seq_printf(f, "%8x@" DFID ": ", id.vpi_index,
583                                    PFID(lu_object_fid(&clob->co_lu)));
584                         if (page) {
585                                 vvp_pgcache_page_show(env, f, page);
586                                 cl_page_put(env, page);
587                         } else {
588                                 seq_puts(f, "missing\n");
589                         }
590                         lu_object_ref_del(&clob->co_lu, "dump", current);
591                         cl_object_put(env, clob);
592                 } else {
593                         seq_printf(f, "%llx missing\n", pos);
594                 }
595                 cl_env_put(env, &refcheck);
596                 result = 0;
597         } else {
598                 result = PTR_ERR(env);
599         }
600         return result;
601 }
602
603 static void *vvp_pgcache_start(struct seq_file *f, loff_t *pos)
604 {
605         struct ll_sb_info *sbi;
606         struct lu_env     *env;
607         int             refcheck;
608
609         sbi = f->private;
610
611         env = cl_env_get(&refcheck);
612         if (!IS_ERR(env)) {
613                 sbi = f->private;
614                 if (sbi->ll_site->ls_obj_hash->hs_cur_bits > 64 - PGC_OBJ_SHIFT)
615                         pos = ERR_PTR(-EFBIG);
616                 else {
617                         *pos = vvp_pgcache_find(env, &sbi->ll_cl->cd_lu_dev,
618                                                 *pos);
619                         if (*pos == ~0ULL)
620                                 pos = NULL;
621                 }
622                 cl_env_put(env, &refcheck);
623         }
624         return pos;
625 }
626
627 static void *vvp_pgcache_next(struct seq_file *f, void *v, loff_t *pos)
628 {
629         struct ll_sb_info *sbi;
630         struct lu_env     *env;
631         int             refcheck;
632
633         env = cl_env_get(&refcheck);
634         if (!IS_ERR(env)) {
635                 sbi = f->private;
636                 *pos = vvp_pgcache_find(env, &sbi->ll_cl->cd_lu_dev, *pos + 1);
637                 if (*pos == ~0ULL)
638                         pos = NULL;
639                 cl_env_put(env, &refcheck);
640         }
641         return pos;
642 }
643
644 static void vvp_pgcache_stop(struct seq_file *f, void *v)
645 {
646         /* Nothing to do */
647 }
648
649 static const struct seq_operations vvp_pgcache_ops = {
650         .start = vvp_pgcache_start,
651         .next  = vvp_pgcache_next,
652         .stop  = vvp_pgcache_stop,
653         .show  = vvp_pgcache_show
654 };
655
656 static int vvp_dump_pgcache_seq_open(struct inode *inode, struct file *filp)
657 {
658         struct seq_file *seq;
659         int rc;
660
661         rc = seq_open(filp, &vvp_pgcache_ops);
662         if (rc)
663                 return rc;
664
665         seq = filp->private_data;
666         seq->private = inode->i_private;
667
668         return 0;
669 }
670
671 const struct file_operations vvp_dump_pgcache_file_ops = {
672         .owner   = THIS_MODULE,
673         .open    = vvp_dump_pgcache_seq_open,
674         .read    = seq_read,
675         .llseek  = seq_lseek,
676         .release = seq_release,
677 };