Merge remote-tracking branch 'jk/vfs' into work.misc
[cascardo/linux.git] / drivers / staging / lustre / lustre / obdclass / cl_page.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2015, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * Client Lustre Page.
33  *
34  *   Author: Nikita Danilov <nikita.danilov@sun.com>
35  *   Author: Jinshan Xiong <jinshan.xiong@intel.com>
36  */
37
38 #define DEBUG_SUBSYSTEM S_CLASS
39
40 #include "../../include/linux/libcfs/libcfs.h"
41 #include "../include/obd_class.h"
42 #include "../include/obd_support.h"
43 #include <linux/list.h>
44
45 #include "../include/cl_object.h"
46 #include "cl_internal.h"
47
48 static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg);
49
50 # define PASSERT(env, page, expr)                                          \
51         do {                                                               \
52                 if (unlikely(!(expr))) {                                   \
53                         CL_PAGE_DEBUG(D_ERROR, (env), (page), #expr "\n"); \
54                         LASSERT(0);                                        \
55                 }                                                          \
56         } while (0)
57
58 # define PINVRNT(env, page, exp) \
59         ((void)sizeof(env), (void)sizeof(page), (void)sizeof !!(exp))
60
61 /**
62  * Internal version of cl_page_get().
63  *
64  * This function can be used to obtain initial reference to previously
65  * unreferenced cached object. It can be called only if concurrent page
66  * reclamation is somehow prevented, e.g., by keeping a lock on a VM page,
67  * associated with \a page.
68  *
69  * Use with care! Not exported.
70  */
71 static void cl_page_get_trust(struct cl_page *page)
72 {
73         LASSERT(atomic_read(&page->cp_ref) > 0);
74         atomic_inc(&page->cp_ref);
75 }
76
77 /**
78  * Returns a slice within a page, corresponding to the given layer in the
79  * device stack.
80  *
81  * \see cl_lock_at()
82  */
83 static const struct cl_page_slice *
84 cl_page_at_trusted(const struct cl_page *page,
85                    const struct lu_device_type *dtype)
86 {
87         const struct cl_page_slice *slice;
88
89         list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
90                 if (slice->cpl_obj->co_lu.lo_dev->ld_type == dtype)
91                         return slice;
92         }
93         return NULL;
94 }
95
96 static void cl_page_free(const struct lu_env *env, struct cl_page *page)
97 {
98         struct cl_object *obj  = page->cp_obj;
99
100         PASSERT(env, page, list_empty(&page->cp_batch));
101         PASSERT(env, page, !page->cp_owner);
102         PASSERT(env, page, !page->cp_req);
103         PASSERT(env, page, page->cp_state == CPS_FREEING);
104
105         while (!list_empty(&page->cp_layers)) {
106                 struct cl_page_slice *slice;
107
108                 slice = list_entry(page->cp_layers.next,
109                                    struct cl_page_slice, cpl_linkage);
110                 list_del_init(page->cp_layers.next);
111                 if (unlikely(slice->cpl_ops->cpo_fini))
112                         slice->cpl_ops->cpo_fini(env, slice);
113         }
114         lu_object_ref_del_at(&obj->co_lu, &page->cp_obj_ref, "cl_page", page);
115         cl_object_put(env, obj);
116         lu_ref_fini(&page->cp_reference);
117         kfree(page);
118 }
119
120 /**
121  * Helper function updating page state. This is the only place in the code
122  * where cl_page::cp_state field is mutated.
123  */
124 static inline void cl_page_state_set_trust(struct cl_page *page,
125                                            enum cl_page_state state)
126 {
127         /* bypass const. */
128         *(enum cl_page_state *)&page->cp_state = state;
129 }
130
131 struct cl_page *cl_page_alloc(const struct lu_env *env,
132                               struct cl_object *o, pgoff_t ind,
133                               struct page *vmpage,
134                               enum cl_page_type type)
135 {
136         struct cl_page    *page;
137         struct lu_object_header *head;
138
139         page = kzalloc(cl_object_header(o)->coh_page_bufsize, GFP_NOFS);
140         if (page) {
141                 int result = 0;
142
143                 atomic_set(&page->cp_ref, 1);
144                 page->cp_obj = o;
145                 cl_object_get(o);
146                 lu_object_ref_add_at(&o->co_lu, &page->cp_obj_ref, "cl_page",
147                                      page);
148                 page->cp_vmpage = vmpage;
149                 cl_page_state_set_trust(page, CPS_CACHED);
150                 page->cp_type = type;
151                 INIT_LIST_HEAD(&page->cp_layers);
152                 INIT_LIST_HEAD(&page->cp_batch);
153                 INIT_LIST_HEAD(&page->cp_flight);
154                 mutex_init(&page->cp_mutex);
155                 lu_ref_init(&page->cp_reference);
156                 head = o->co_lu.lo_header;
157                 list_for_each_entry(o, &head->loh_layers, co_lu.lo_linkage) {
158                         if (o->co_ops->coo_page_init) {
159                                 result = o->co_ops->coo_page_init(env, o, page,
160                                                                   ind);
161                                 if (result != 0) {
162                                         cl_page_delete0(env, page);
163                                         cl_page_free(env, page);
164                                         page = ERR_PTR(result);
165                                         break;
166                                 }
167                         }
168                 }
169         } else {
170                 page = ERR_PTR(-ENOMEM);
171         }
172         return page;
173 }
174 EXPORT_SYMBOL(cl_page_alloc);
175
176 /**
177  * Returns a cl_page with index \a idx at the object \a o, and associated with
178  * the VM page \a vmpage.
179  *
180  * This is the main entry point into the cl_page caching interface. First, a
181  * cache (implemented as a per-object radix tree) is consulted. If page is
182  * found there, it is returned immediately. Otherwise new page is allocated
183  * and returned. In any case, additional reference to page is acquired.
184  *
185  * \see cl_object_find(), cl_lock_find()
186  */
187 struct cl_page *cl_page_find(const struct lu_env *env,
188                              struct cl_object *o,
189                              pgoff_t idx, struct page *vmpage,
190                              enum cl_page_type type)
191 {
192         struct cl_page    *page = NULL;
193         struct cl_object_header *hdr;
194
195         LASSERT(type == CPT_CACHEABLE || type == CPT_TRANSIENT);
196         might_sleep();
197
198         hdr = cl_object_header(o);
199
200         CDEBUG(D_PAGE, "%lu@"DFID" %p %lx %d\n",
201                idx, PFID(&hdr->coh_lu.loh_fid), vmpage, vmpage->private, type);
202         /* fast path. */
203         if (type == CPT_CACHEABLE) {
204                 /*
205                  * vmpage lock is used to protect the child/parent
206                  * relationship
207                  */
208                 KLASSERT(PageLocked(vmpage));
209                 /*
210                  * cl_vmpage_page() can be called here without any locks as
211                  *
212                  *     - "vmpage" is locked (which prevents ->private from
213                  *       concurrent updates), and
214                  *
215                  *     - "o" cannot be destroyed while current thread holds a
216                  *       reference on it.
217                  */
218                 page = cl_vmpage_page(vmpage, o);
219
220                 if (page)
221                         return page;
222         }
223
224         /* allocate and initialize cl_page */
225         page = cl_page_alloc(env, o, idx, vmpage, type);
226         return page;
227 }
228 EXPORT_SYMBOL(cl_page_find);
229
230 static inline int cl_page_invariant(const struct cl_page *pg)
231 {
232         /*
233          * Page invariant is protected by a VM lock.
234          */
235         LINVRNT(cl_page_is_vmlocked(NULL, pg));
236
237         return cl_page_in_use_noref(pg);
238 }
239
240 static void cl_page_state_set0(const struct lu_env *env,
241                                struct cl_page *page, enum cl_page_state state)
242 {
243         enum cl_page_state old;
244
245         /*
246          * Matrix of allowed state transitions [old][new], for sanity
247          * checking.
248          */
249         static const int allowed_transitions[CPS_NR][CPS_NR] = {
250                 [CPS_CACHED] = {
251                         [CPS_CACHED]  = 0,
252                         [CPS_OWNED]   = 1, /* io finds existing cached page */
253                         [CPS_PAGEIN]  = 0,
254                         [CPS_PAGEOUT] = 1, /* write-out from the cache */
255                         [CPS_FREEING] = 1, /* eviction on the memory pressure */
256                 },
257                 [CPS_OWNED] = {
258                         [CPS_CACHED]  = 1, /* release to the cache */
259                         [CPS_OWNED]   = 0,
260                         [CPS_PAGEIN]  = 1, /* start read immediately */
261                         [CPS_PAGEOUT] = 1, /* start write immediately */
262                         [CPS_FREEING] = 1, /* lock invalidation or truncate */
263                 },
264                 [CPS_PAGEIN] = {
265                         [CPS_CACHED]  = 1, /* io completion */
266                         [CPS_OWNED]   = 0,
267                         [CPS_PAGEIN]  = 0,
268                         [CPS_PAGEOUT] = 0,
269                         [CPS_FREEING] = 0,
270                 },
271                 [CPS_PAGEOUT] = {
272                         [CPS_CACHED]  = 1, /* io completion */
273                         [CPS_OWNED]   = 0,
274                         [CPS_PAGEIN]  = 0,
275                         [CPS_PAGEOUT] = 0,
276                         [CPS_FREEING] = 0,
277                 },
278                 [CPS_FREEING] = {
279                         [CPS_CACHED]  = 0,
280                         [CPS_OWNED]   = 0,
281                         [CPS_PAGEIN]  = 0,
282                         [CPS_PAGEOUT] = 0,
283                         [CPS_FREEING] = 0,
284                 }
285         };
286
287         old = page->cp_state;
288         PASSERT(env, page, allowed_transitions[old][state]);
289         CL_PAGE_HEADER(D_TRACE, env, page, "%d -> %d\n", old, state);
290         PASSERT(env, page, page->cp_state == old);
291         PASSERT(env, page, equi(state == CPS_OWNED, page->cp_owner));
292         cl_page_state_set_trust(page, state);
293 }
294
295 static void cl_page_state_set(const struct lu_env *env,
296                               struct cl_page *page, enum cl_page_state state)
297 {
298         cl_page_state_set0(env, page, state);
299 }
300
301 /**
302  * Acquires an additional reference to a page.
303  *
304  * This can be called only by caller already possessing a reference to \a
305  * page.
306  *
307  * \see cl_object_get(), cl_lock_get().
308  */
309 void cl_page_get(struct cl_page *page)
310 {
311         cl_page_get_trust(page);
312 }
313 EXPORT_SYMBOL(cl_page_get);
314
315 /**
316  * Releases a reference to a page.
317  *
318  * When last reference is released, page is returned to the cache, unless it
319  * is in cl_page_state::CPS_FREEING state, in which case it is immediately
320  * destroyed.
321  *
322  * \see cl_object_put(), cl_lock_put().
323  */
324 void cl_page_put(const struct lu_env *env, struct cl_page *page)
325 {
326         CL_PAGE_HEADER(D_TRACE, env, page, "%d\n",
327                        atomic_read(&page->cp_ref));
328
329         if (atomic_dec_and_test(&page->cp_ref)) {
330                 LASSERT(page->cp_state == CPS_FREEING);
331
332                 LASSERT(atomic_read(&page->cp_ref) == 0);
333                 PASSERT(env, page, !page->cp_owner);
334                 PASSERT(env, page, list_empty(&page->cp_batch));
335                 /*
336                  * Page is no longer reachable by other threads. Tear
337                  * it down.
338                  */
339                 cl_page_free(env, page);
340         }
341 }
342 EXPORT_SYMBOL(cl_page_put);
343
344 /**
345  * Returns a cl_page associated with a VM page, and given cl_object.
346  */
347 struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj)
348 {
349         struct cl_page *page;
350
351         KLASSERT(PageLocked(vmpage));
352
353         /*
354          * NOTE: absence of races and liveness of data are guaranteed by page
355          *       lock on a "vmpage". That works because object destruction has
356          *       bottom-to-top pass.
357          */
358
359         page = (struct cl_page *)vmpage->private;
360         if (page) {
361                 cl_page_get_trust(page);
362                 LASSERT(page->cp_type == CPT_CACHEABLE);
363         }
364         return page;
365 }
366 EXPORT_SYMBOL(cl_vmpage_page);
367
368 const struct cl_page_slice *cl_page_at(const struct cl_page *page,
369                                        const struct lu_device_type *dtype)
370 {
371         return cl_page_at_trusted(page, dtype);
372 }
373 EXPORT_SYMBOL(cl_page_at);
374
375 #define CL_PAGE_OP(opname) offsetof(struct cl_page_operations, opname)
376
377 #define CL_PAGE_INVOKE(_env, _page, _op, _proto, ...)              \
378 ({                                                                    \
379         const struct lu_env     *__env  = (_env);                   \
380         struct cl_page       *__page = (_page);            \
381         const struct cl_page_slice *__scan;                          \
382         int                      __result;                         \
383         ptrdiff_t                  __op   = (_op);                   \
384         int                    (*__method)_proto;                   \
385                                                                         \
386         __result = 0;                                              \
387         list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) {  \
388                 __method = *(void **)((char *)__scan->cpl_ops +  __op); \
389                 if (__method) {                                         \
390                         __result = (*__method)(__env, __scan, ## __VA_ARGS__); \
391                         if (__result != 0)                              \
392                                 break;                                  \
393                 }                                                       \
394         }                                                               \
395         if (__result > 0)                                              \
396                 __result = 0;                                      \
397         __result;                                                      \
398 })
399
400 #define CL_PAGE_INVOKE_REVERSE(_env, _page, _op, _proto, ...)           \
401 ({                                                                      \
402         const struct lu_env        *__env  = (_env);                    \
403         struct cl_page             *__page = (_page);                   \
404         const struct cl_page_slice *__scan;                             \
405         int                         __result;                           \
406         ptrdiff_t                   __op   = (_op);                     \
407         int                       (*__method)_proto;                    \
408                                                                         \
409         __result = 0;                                                   \
410         list_for_each_entry_reverse(__scan, &__page->cp_layers,         \
411                                         cpl_linkage) {                  \
412                 __method = *(void **)((char *)__scan->cpl_ops +  __op); \
413                 if (__method) {                                         \
414                         __result = (*__method)(__env, __scan, ## __VA_ARGS__); \
415                         if (__result != 0)                              \
416                                 break;                                  \
417                 }                                                       \
418         }                                                               \
419         if (__result > 0)                                               \
420                 __result = 0;                                           \
421         __result;                                                       \
422 })
423
424 #define CL_PAGE_INVOID(_env, _page, _op, _proto, ...)              \
425 do {                                                                \
426         const struct lu_env     *__env  = (_env);                   \
427         struct cl_page       *__page = (_page);            \
428         const struct cl_page_slice *__scan;                          \
429         ptrdiff_t                  __op   = (_op);                   \
430         void                  (*__method)_proto;                    \
431                                                                         \
432         list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) {  \
433                 __method = *(void **)((char *)__scan->cpl_ops + __op);  \
434                 if (__method)                                           \
435                         (*__method)(__env, __scan, ## __VA_ARGS__);     \
436         }                                                               \
437 } while (0)
438
439 #define CL_PAGE_INVOID_REVERSE(_env, _page, _op, _proto, ...)          \
440 do {                                                                    \
441         const struct lu_env     *__env  = (_env);                       \
442         struct cl_page       *__page = (_page);                \
443         const struct cl_page_slice *__scan;                              \
444         ptrdiff_t                  __op   = (_op);                       \
445         void                  (*__method)_proto;                        \
446                                                                             \
447         list_for_each_entry_reverse(__scan, &__page->cp_layers, cpl_linkage) { \
448                 __method = *(void **)((char *)__scan->cpl_ops + __op);  \
449                 if (__method)                                           \
450                         (*__method)(__env, __scan, ## __VA_ARGS__);     \
451         }                                                               \
452 } while (0)
453
454 static int cl_page_invoke(const struct lu_env *env,
455                           struct cl_io *io, struct cl_page *page, ptrdiff_t op)
456
457 {
458         PINVRNT(env, page, cl_object_same(page->cp_obj, io->ci_obj));
459         return CL_PAGE_INVOKE(env, page, op,
460                               (const struct lu_env *,
461                                const struct cl_page_slice *, struct cl_io *),
462                               io);
463 }
464
465 static void cl_page_invoid(const struct lu_env *env,
466                            struct cl_io *io, struct cl_page *page, ptrdiff_t op)
467
468 {
469         PINVRNT(env, page, cl_object_same(page->cp_obj, io->ci_obj));
470         CL_PAGE_INVOID(env, page, op,
471                        (const struct lu_env *,
472                         const struct cl_page_slice *, struct cl_io *), io);
473 }
474
475 static void cl_page_owner_clear(struct cl_page *page)
476 {
477         if (page->cp_owner) {
478                 LASSERT(page->cp_owner->ci_owned_nr > 0);
479                 page->cp_owner->ci_owned_nr--;
480                 page->cp_owner = NULL;
481                 page->cp_task = NULL;
482         }
483 }
484
485 static void cl_page_owner_set(struct cl_page *page)
486 {
487         page->cp_owner->ci_owned_nr++;
488 }
489
490 void cl_page_disown0(const struct lu_env *env,
491                      struct cl_io *io, struct cl_page *pg)
492 {
493         enum cl_page_state state;
494
495         state = pg->cp_state;
496         PINVRNT(env, pg, state == CPS_OWNED || state == CPS_FREEING);
497         PINVRNT(env, pg, cl_page_invariant(pg) || state == CPS_FREEING);
498         cl_page_owner_clear(pg);
499
500         if (state == CPS_OWNED)
501                 cl_page_state_set(env, pg, CPS_CACHED);
502         /*
503          * Completion call-backs are executed in the bottom-up order, so that
504          * uppermost layer (llite), responsible for VFS/VM interaction runs
505          * last and can release locks safely.
506          */
507         CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_disown),
508                                (const struct lu_env *,
509                                 const struct cl_page_slice *, struct cl_io *),
510                                io);
511 }
512
513 /**
514  * returns true, iff page is owned by the given io.
515  */
516 int cl_page_is_owned(const struct cl_page *pg, const struct cl_io *io)
517 {
518         struct cl_io *top = cl_io_top((struct cl_io *)io);
519         LINVRNT(cl_object_same(pg->cp_obj, io->ci_obj));
520         return pg->cp_state == CPS_OWNED && pg->cp_owner == top;
521 }
522 EXPORT_SYMBOL(cl_page_is_owned);
523
524 /**
525  * Try to own a page by IO.
526  *
527  * Waits until page is in cl_page_state::CPS_CACHED state, and then switch it
528  * into cl_page_state::CPS_OWNED state.
529  *
530  * \pre  !cl_page_is_owned(pg, io)
531  * \post result == 0 iff cl_page_is_owned(pg, io)
532  *
533  * \retval 0   success
534  *
535  * \retval -ve failure, e.g., page was destroyed (and landed in
536  *           cl_page_state::CPS_FREEING instead of cl_page_state::CPS_CACHED).
537  *           or, page was owned by another thread, or in IO.
538  *
539  * \see cl_page_disown()
540  * \see cl_page_operations::cpo_own()
541  * \see cl_page_own_try()
542  * \see cl_page_own
543  */
544 static int cl_page_own0(const struct lu_env *env, struct cl_io *io,
545                         struct cl_page *pg, int nonblock)
546 {
547         int result;
548
549         PINVRNT(env, pg, !cl_page_is_owned(pg, io));
550
551         io = cl_io_top(io);
552
553         if (pg->cp_state == CPS_FREEING) {
554                 result = -ENOENT;
555         } else {
556                 result = CL_PAGE_INVOKE(env, pg, CL_PAGE_OP(cpo_own),
557                                         (const struct lu_env *,
558                                          const struct cl_page_slice *,
559                                          struct cl_io *, int),
560                                         io, nonblock);
561                 if (result == 0) {
562                         PASSERT(env, pg, !pg->cp_owner);
563                         PASSERT(env, pg, !pg->cp_req);
564                         pg->cp_owner = cl_io_top(io);
565                         pg->cp_task  = current;
566                         cl_page_owner_set(pg);
567                         if (pg->cp_state != CPS_FREEING) {
568                                 cl_page_state_set(env, pg, CPS_OWNED);
569                         } else {
570                                 cl_page_disown0(env, io, pg);
571                                 result = -ENOENT;
572                         }
573                 }
574         }
575         PINVRNT(env, pg, ergo(result == 0, cl_page_invariant(pg)));
576         return result;
577 }
578
579 /**
580  * Own a page, might be blocked.
581  *
582  * \see cl_page_own0()
583  */
584 int cl_page_own(const struct lu_env *env, struct cl_io *io, struct cl_page *pg)
585 {
586         return cl_page_own0(env, io, pg, 0);
587 }
588 EXPORT_SYMBOL(cl_page_own);
589
590 /**
591  * Nonblock version of cl_page_own().
592  *
593  * \see cl_page_own0()
594  */
595 int cl_page_own_try(const struct lu_env *env, struct cl_io *io,
596                     struct cl_page *pg)
597 {
598         return cl_page_own0(env, io, pg, 1);
599 }
600 EXPORT_SYMBOL(cl_page_own_try);
601
602 /**
603  * Assume page ownership.
604  *
605  * Called when page is already locked by the hosting VM.
606  *
607  * \pre !cl_page_is_owned(pg, io)
608  * \post cl_page_is_owned(pg, io)
609  *
610  * \see cl_page_operations::cpo_assume()
611  */
612 void cl_page_assume(const struct lu_env *env,
613                     struct cl_io *io, struct cl_page *pg)
614 {
615         PINVRNT(env, pg, cl_object_same(pg->cp_obj, io->ci_obj));
616
617         io = cl_io_top(io);
618
619         cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_assume));
620         PASSERT(env, pg, !pg->cp_owner);
621         pg->cp_owner = cl_io_top(io);
622         pg->cp_task = current;
623         cl_page_owner_set(pg);
624         cl_page_state_set(env, pg, CPS_OWNED);
625 }
626 EXPORT_SYMBOL(cl_page_assume);
627
628 /**
629  * Releases page ownership without unlocking the page.
630  *
631  * Moves page into cl_page_state::CPS_CACHED without releasing a lock on the
632  * underlying VM page (as VM is supposed to do this itself).
633  *
634  * \pre   cl_page_is_owned(pg, io)
635  * \post !cl_page_is_owned(pg, io)
636  *
637  * \see cl_page_assume()
638  */
639 void cl_page_unassume(const struct lu_env *env,
640                       struct cl_io *io, struct cl_page *pg)
641 {
642         PINVRNT(env, pg, cl_page_is_owned(pg, io));
643         PINVRNT(env, pg, cl_page_invariant(pg));
644
645         io = cl_io_top(io);
646         cl_page_owner_clear(pg);
647         cl_page_state_set(env, pg, CPS_CACHED);
648         CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_unassume),
649                                (const struct lu_env *,
650                                 const struct cl_page_slice *, struct cl_io *),
651                                io);
652 }
653 EXPORT_SYMBOL(cl_page_unassume);
654
655 /**
656  * Releases page ownership.
657  *
658  * Moves page into cl_page_state::CPS_CACHED.
659  *
660  * \pre   cl_page_is_owned(pg, io)
661  * \post !cl_page_is_owned(pg, io)
662  *
663  * \see cl_page_own()
664  * \see cl_page_operations::cpo_disown()
665  */
666 void cl_page_disown(const struct lu_env *env,
667                     struct cl_io *io, struct cl_page *pg)
668 {
669         PINVRNT(env, pg, cl_page_is_owned(pg, io) ||
670                 pg->cp_state == CPS_FREEING);
671
672         io = cl_io_top(io);
673         cl_page_disown0(env, io, pg);
674 }
675 EXPORT_SYMBOL(cl_page_disown);
676
677 /**
678  * Called when page is to be removed from the object, e.g., as a result of
679  * truncate.
680  *
681  * Calls cl_page_operations::cpo_discard() top-to-bottom.
682  *
683  * \pre cl_page_is_owned(pg, io)
684  *
685  * \see cl_page_operations::cpo_discard()
686  */
687 void cl_page_discard(const struct lu_env *env,
688                      struct cl_io *io, struct cl_page *pg)
689 {
690         PINVRNT(env, pg, cl_page_is_owned(pg, io));
691         PINVRNT(env, pg, cl_page_invariant(pg));
692
693         cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_discard));
694 }
695 EXPORT_SYMBOL(cl_page_discard);
696
697 /**
698  * Version of cl_page_delete() that can be called for not fully constructed
699  * pages, e.g,. in a error handling cl_page_find()->cl_page_delete0()
700  * path. Doesn't check page invariant.
701  */
702 static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg)
703 {
704         PASSERT(env, pg, pg->cp_state != CPS_FREEING);
705
706         /*
707          * Severe all ways to obtain new pointers to @pg.
708          */
709         cl_page_owner_clear(pg);
710
711         cl_page_state_set0(env, pg, CPS_FREEING);
712
713         CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_delete),
714                                (const struct lu_env *,
715                                 const struct cl_page_slice *));
716 }
717
718 /**
719  * Called when a decision is made to throw page out of memory.
720  *
721  * Notifies all layers about page destruction by calling
722  * cl_page_operations::cpo_delete() method top-to-bottom.
723  *
724  * Moves page into cl_page_state::CPS_FREEING state (this is the only place
725  * where transition to this state happens).
726  *
727  * Eliminates all venues through which new references to the page can be
728  * obtained:
729  *
730  *     - removes page from the radix trees,
731  *
732  *     - breaks linkage from VM page to cl_page.
733  *
734  * Once page reaches cl_page_state::CPS_FREEING, all remaining references will
735  * drain after some time, at which point page will be recycled.
736  *
737  * \pre  VM page is locked
738  * \post pg->cp_state == CPS_FREEING
739  *
740  * \see cl_page_operations::cpo_delete()
741  */
742 void cl_page_delete(const struct lu_env *env, struct cl_page *pg)
743 {
744         PINVRNT(env, pg, cl_page_invariant(pg));
745         cl_page_delete0(env, pg);
746 }
747 EXPORT_SYMBOL(cl_page_delete);
748
749 /**
750  * Marks page up-to-date.
751  *
752  * Call cl_page_operations::cpo_export() through all layers top-to-bottom. The
753  * layer responsible for VM interaction has to mark/clear page as up-to-date
754  * by the \a uptodate argument.
755  *
756  * \see cl_page_operations::cpo_export()
757  */
758 void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate)
759 {
760         PINVRNT(env, pg, cl_page_invariant(pg));
761         CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_export),
762                        (const struct lu_env *,
763                         const struct cl_page_slice *, int), uptodate);
764 }
765 EXPORT_SYMBOL(cl_page_export);
766
767 /**
768  * Returns true, iff \a pg is VM locked in a suitable sense by the calling
769  * thread.
770  */
771 int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg)
772 {
773         int result;
774         const struct cl_page_slice *slice;
775
776         slice = container_of(pg->cp_layers.next,
777                              const struct cl_page_slice, cpl_linkage);
778         PASSERT(env, pg, slice->cpl_ops->cpo_is_vmlocked);
779         /*
780          * Call ->cpo_is_vmlocked() directly instead of going through
781          * CL_PAGE_INVOKE(), because cl_page_is_vmlocked() is used by
782          * cl_page_invariant().
783          */
784         result = slice->cpl_ops->cpo_is_vmlocked(env, slice);
785         PASSERT(env, pg, result == -EBUSY || result == -ENODATA);
786         return result == -EBUSY;
787 }
788 EXPORT_SYMBOL(cl_page_is_vmlocked);
789
790 static enum cl_page_state cl_req_type_state(enum cl_req_type crt)
791 {
792         return crt == CRT_WRITE ? CPS_PAGEOUT : CPS_PAGEIN;
793 }
794
795 static void cl_page_io_start(const struct lu_env *env,
796                              struct cl_page *pg, enum cl_req_type crt)
797 {
798         /*
799          * Page is queued for IO, change its state.
800          */
801         cl_page_owner_clear(pg);
802         cl_page_state_set(env, pg, cl_req_type_state(crt));
803 }
804
805 /**
806  * Prepares page for immediate transfer. cl_page_operations::cpo_prep() is
807  * called top-to-bottom. Every layer either agrees to submit this page (by
808  * returning 0), or requests to omit this page (by returning -EALREADY). Layer
809  * handling interactions with the VM also has to inform VM that page is under
810  * transfer now.
811  */
812 int cl_page_prep(const struct lu_env *env, struct cl_io *io,
813                  struct cl_page *pg, enum cl_req_type crt)
814 {
815         int result;
816
817         PINVRNT(env, pg, cl_page_is_owned(pg, io));
818         PINVRNT(env, pg, cl_page_invariant(pg));
819         PINVRNT(env, pg, crt < CRT_NR);
820
821         /*
822          * XXX this has to be called bottom-to-top, so that llite can set up
823          * PG_writeback without risking other layers deciding to skip this
824          * page.
825          */
826         if (crt >= CRT_NR)
827                 return -EINVAL;
828         result = cl_page_invoke(env, io, pg, CL_PAGE_OP(io[crt].cpo_prep));
829         if (result == 0)
830                 cl_page_io_start(env, pg, crt);
831
832         CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
833         return result;
834 }
835 EXPORT_SYMBOL(cl_page_prep);
836
837 /**
838  * Notify layers about transfer completion.
839  *
840  * Invoked by transfer sub-system (which is a part of osc) to notify layers
841  * that a transfer, of which this page is a part of has completed.
842  *
843  * Completion call-backs are executed in the bottom-up order, so that
844  * uppermost layer (llite), responsible for the VFS/VM interaction runs last
845  * and can release locks safely.
846  *
847  * \pre  pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT
848  * \post pg->cp_state == CPS_CACHED
849  *
850  * \see cl_page_operations::cpo_completion()
851  */
852 void cl_page_completion(const struct lu_env *env,
853                         struct cl_page *pg, enum cl_req_type crt, int ioret)
854 {
855         struct cl_sync_io *anchor = pg->cp_sync_io;
856
857         PASSERT(env, pg, crt < CRT_NR);
858         /* cl_page::cp_req already cleared by the caller (osc_completion()) */
859         PASSERT(env, pg, !pg->cp_req);
860         PASSERT(env, pg, pg->cp_state == cl_req_type_state(crt));
861
862         CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, ioret);
863         if (crt == CRT_READ && ioret == 0) {
864                 PASSERT(env, pg, !(pg->cp_flags & CPF_READ_COMPLETED));
865                 pg->cp_flags |= CPF_READ_COMPLETED;
866         }
867
868         cl_page_state_set(env, pg, CPS_CACHED);
869         if (crt >= CRT_NR)
870                 return;
871         CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(io[crt].cpo_completion),
872                                (const struct lu_env *,
873                                 const struct cl_page_slice *, int), ioret);
874         if (anchor) {
875                 LASSERT(cl_page_is_vmlocked(env, pg));
876                 LASSERT(pg->cp_sync_io == anchor);
877                 pg->cp_sync_io = NULL;
878         }
879         /*
880          * As page->cp_obj is pinned by a reference from page->cp_req, it is
881          * safe to call cl_page_put() without risking object destruction in a
882          * non-blocking context.
883          */
884         cl_page_put(env, pg);
885
886         if (anchor)
887                 cl_sync_io_note(env, anchor, ioret);
888 }
889 EXPORT_SYMBOL(cl_page_completion);
890
891 /**
892  * Notify layers that transfer formation engine decided to yank this page from
893  * the cache and to make it a part of a transfer.
894  *
895  * \pre  pg->cp_state == CPS_CACHED
896  * \post pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT
897  *
898  * \see cl_page_operations::cpo_make_ready()
899  */
900 int cl_page_make_ready(const struct lu_env *env, struct cl_page *pg,
901                        enum cl_req_type crt)
902 {
903         int result;
904
905         PINVRNT(env, pg, crt < CRT_NR);
906
907         if (crt >= CRT_NR)
908                 return -EINVAL;
909         result = CL_PAGE_INVOKE(env, pg, CL_PAGE_OP(io[crt].cpo_make_ready),
910                                 (const struct lu_env *,
911                                  const struct cl_page_slice *));
912         if (result == 0) {
913                 PASSERT(env, pg, pg->cp_state == CPS_CACHED);
914                 cl_page_io_start(env, pg, crt);
915         }
916         CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
917         return result;
918 }
919 EXPORT_SYMBOL(cl_page_make_ready);
920
921 /**
922  * Called if a pge is being written back by kernel's intention.
923  *
924  * \pre  cl_page_is_owned(pg, io)
925  * \post ergo(result == 0, pg->cp_state == CPS_PAGEOUT)
926  *
927  * \see cl_page_operations::cpo_flush()
928  */
929 int cl_page_flush(const struct lu_env *env, struct cl_io *io,
930                   struct cl_page *pg)
931 {
932         int result;
933
934         PINVRNT(env, pg, cl_page_is_owned(pg, io));
935         PINVRNT(env, pg, cl_page_invariant(pg));
936
937         result = cl_page_invoke(env, io, pg, CL_PAGE_OP(cpo_flush));
938
939         CL_PAGE_HEADER(D_TRACE, env, pg, "%d\n", result);
940         return result;
941 }
942 EXPORT_SYMBOL(cl_page_flush);
943
944 /**
945  * Checks whether page is protected by any extent lock is at least required
946  * mode.
947  *
948  * \return the same as in cl_page_operations::cpo_is_under_lock() method.
949  * \see cl_page_operations::cpo_is_under_lock()
950  */
951 int cl_page_is_under_lock(const struct lu_env *env, struct cl_io *io,
952                           struct cl_page *page, pgoff_t *max_index)
953 {
954         int rc;
955
956         PINVRNT(env, page, cl_page_invariant(page));
957
958         rc = CL_PAGE_INVOKE_REVERSE(env, page, CL_PAGE_OP(cpo_is_under_lock),
959                                     (const struct lu_env *,
960                                      const struct cl_page_slice *,
961                                       struct cl_io *, pgoff_t *),
962                                     io, max_index);
963         return rc;
964 }
965 EXPORT_SYMBOL(cl_page_is_under_lock);
966
967 /**
968  * Tells transfer engine that only part of a page is to be transmitted.
969  *
970  * \see cl_page_operations::cpo_clip()
971  */
972 void cl_page_clip(const struct lu_env *env, struct cl_page *pg,
973                   int from, int to)
974 {
975         PINVRNT(env, pg, cl_page_invariant(pg));
976
977         CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", from, to);
978         CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_clip),
979                        (const struct lu_env *,
980                         const struct cl_page_slice *, int, int),
981                        from, to);
982 }
983 EXPORT_SYMBOL(cl_page_clip);
984
985 /**
986  * Prints human readable representation of \a pg to the \a f.
987  */
988 void cl_page_header_print(const struct lu_env *env, void *cookie,
989                           lu_printer_t printer, const struct cl_page *pg)
990 {
991         (*printer)(env, cookie,
992                    "page@%p[%d %p %d %d %d %p %p %#x]\n",
993                    pg, atomic_read(&pg->cp_ref), pg->cp_obj,
994                    pg->cp_state, pg->cp_error, pg->cp_type,
995                    pg->cp_owner, pg->cp_req, pg->cp_flags);
996 }
997 EXPORT_SYMBOL(cl_page_header_print);
998
999 /**
1000  * Prints human readable representation of \a pg to the \a f.
1001  */
1002 void cl_page_print(const struct lu_env *env, void *cookie,
1003                    lu_printer_t printer, const struct cl_page *pg)
1004 {
1005         cl_page_header_print(env, cookie, printer, pg);
1006         CL_PAGE_INVOKE(env, (struct cl_page *)pg, CL_PAGE_OP(cpo_print),
1007                        (const struct lu_env *env,
1008                         const struct cl_page_slice *slice,
1009                         void *cookie, lu_printer_t p), cookie, printer);
1010         (*printer)(env, cookie, "end page@%p\n", pg);
1011 }
1012 EXPORT_SYMBOL(cl_page_print);
1013
1014 /**
1015  * Cancel a page which is still in a transfer.
1016  */
1017 int cl_page_cancel(const struct lu_env *env, struct cl_page *page)
1018 {
1019         return CL_PAGE_INVOKE(env, page, CL_PAGE_OP(cpo_cancel),
1020                               (const struct lu_env *,
1021                                const struct cl_page_slice *));
1022 }
1023 EXPORT_SYMBOL(cl_page_cancel);
1024
1025 /**
1026  * Converts a byte offset within object \a obj into a page index.
1027  */
1028 loff_t cl_offset(const struct cl_object *obj, pgoff_t idx)
1029 {
1030         /*
1031          * XXX for now.
1032          */
1033         return (loff_t)idx << PAGE_SHIFT;
1034 }
1035 EXPORT_SYMBOL(cl_offset);
1036
1037 /**
1038  * Converts a page index into a byte offset within object \a obj.
1039  */
1040 pgoff_t cl_index(const struct cl_object *obj, loff_t offset)
1041 {
1042         /*
1043          * XXX for now.
1044          */
1045         return offset >> PAGE_SHIFT;
1046 }
1047 EXPORT_SYMBOL(cl_index);
1048
1049 int cl_page_size(const struct cl_object *obj)
1050 {
1051         return 1 << PAGE_SHIFT;
1052 }
1053 EXPORT_SYMBOL(cl_page_size);
1054
1055 /**
1056  * Adds page slice to the compound page.
1057  *
1058  * This is called by cl_object_operations::coo_page_init() methods to add a
1059  * per-layer state to the page. New state is added at the end of
1060  * cl_page::cp_layers list, that is, it is at the bottom of the stack.
1061  *
1062  * \see cl_lock_slice_add(), cl_req_slice_add(), cl_io_slice_add()
1063  */
1064 void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice,
1065                        struct cl_object *obj, pgoff_t index,
1066                        const struct cl_page_operations *ops)
1067 {
1068         list_add_tail(&slice->cpl_linkage, &page->cp_layers);
1069         slice->cpl_obj  = obj;
1070         slice->cpl_index = index;
1071         slice->cpl_ops  = ops;
1072         slice->cpl_page = page;
1073 }
1074 EXPORT_SYMBOL(cl_page_slice_add);
1075
1076 /**
1077  * Allocate and initialize cl_cache, called by ll_init_sbi().
1078  */
1079 struct cl_client_cache *cl_cache_init(unsigned long lru_page_max)
1080 {
1081         struct cl_client_cache  *cache = NULL;
1082
1083         cache = kzalloc(sizeof(*cache), GFP_KERNEL);
1084         if (!cache)
1085                 return NULL;
1086
1087         /* Initialize cache data */
1088         atomic_set(&cache->ccc_users, 1);
1089         cache->ccc_lru_max = lru_page_max;
1090         atomic_set(&cache->ccc_lru_left, lru_page_max);
1091         spin_lock_init(&cache->ccc_lru_lock);
1092         INIT_LIST_HEAD(&cache->ccc_lru);
1093
1094         atomic_set(&cache->ccc_unstable_nr, 0);
1095         init_waitqueue_head(&cache->ccc_unstable_waitq);
1096
1097         return cache;
1098 }
1099 EXPORT_SYMBOL(cl_cache_init);
1100
1101 /**
1102  * Increase cl_cache refcount
1103  */
1104 void cl_cache_incref(struct cl_client_cache *cache)
1105 {
1106         atomic_inc(&cache->ccc_users);
1107 }
1108 EXPORT_SYMBOL(cl_cache_incref);
1109
1110 /**
1111  * Decrease cl_cache refcount and free the cache if refcount=0.
1112  * Since llite, lov and osc all hold cl_cache refcount,
1113  * the free will not cause race. (LU-6173)
1114  */
1115 void cl_cache_decref(struct cl_client_cache *cache)
1116 {
1117         if (atomic_dec_and_test(&cache->ccc_users))
1118                 kfree(cache);
1119 }
1120 EXPORT_SYMBOL(cl_cache_decref);