Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso...
[cascardo/linux.git] / drivers / staging / lustre / lustre / llite / vvp_page.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2015, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * Implementation of cl_page for VVP layer.
33  *
34  *   Author: Nikita Danilov <nikita.danilov@sun.com>
35  *   Author: Jinshan Xiong <jinshan.xiong@whamcloud.com>
36  */
37
38 #define DEBUG_SUBSYSTEM S_LLITE
39
40 #include <linux/atomic.h>
41 #include <linux/bitops.h>
42 #include <linux/mm.h>
43 #include <linux/mutex.h>
44 #include <linux/page-flags.h>
45 #include <linux/pagemap.h>
46
47 #include "../include/lustre_lite.h"
48
49 #include "llite_internal.h"
50 #include "vvp_internal.h"
51
52 /*****************************************************************************
53  *
54  * Page operations.
55  *
56  */
57
58 static void vvp_page_fini_common(struct vvp_page *vpg)
59 {
60         struct page *vmpage = vpg->vpg_page;
61
62         LASSERT(vmpage);
63         put_page(vmpage);
64 }
65
66 static void vvp_page_fini(const struct lu_env *env,
67                           struct cl_page_slice *slice)
68 {
69         struct vvp_page *vpg     = cl2vvp_page(slice);
70         struct page     *vmpage  = vpg->vpg_page;
71
72         /*
73          * vmpage->private was already cleared when page was moved into
74          * VPG_FREEING state.
75          */
76         LASSERT((struct cl_page *)vmpage->private != slice->cpl_page);
77         vvp_page_fini_common(vpg);
78 }
79
80 static int vvp_page_own(const struct lu_env *env,
81                         const struct cl_page_slice *slice, struct cl_io *io,
82                         int nonblock)
83 {
84         struct vvp_page *vpg    = cl2vvp_page(slice);
85         struct page     *vmpage = vpg->vpg_page;
86
87         LASSERT(vmpage);
88         if (nonblock) {
89                 if (!trylock_page(vmpage))
90                         return -EAGAIN;
91
92                 if (unlikely(PageWriteback(vmpage))) {
93                         unlock_page(vmpage);
94                         return -EAGAIN;
95                 }
96
97                 return 0;
98         }
99
100         lock_page(vmpage);
101         wait_on_page_writeback(vmpage);
102
103         return 0;
104 }
105
106 static void vvp_page_assume(const struct lu_env *env,
107                             const struct cl_page_slice *slice,
108                             struct cl_io *unused)
109 {
110         struct page *vmpage = cl2vm_page(slice);
111
112         LASSERT(vmpage);
113         LASSERT(PageLocked(vmpage));
114         wait_on_page_writeback(vmpage);
115 }
116
117 static void vvp_page_unassume(const struct lu_env *env,
118                               const struct cl_page_slice *slice,
119                               struct cl_io *unused)
120 {
121         struct page *vmpage = cl2vm_page(slice);
122
123         LASSERT(vmpage);
124         LASSERT(PageLocked(vmpage));
125 }
126
127 static void vvp_page_disown(const struct lu_env *env,
128                             const struct cl_page_slice *slice, struct cl_io *io)
129 {
130         struct page *vmpage = cl2vm_page(slice);
131
132         LASSERT(vmpage);
133         LASSERT(PageLocked(vmpage));
134
135         unlock_page(cl2vm_page(slice));
136 }
137
138 static void vvp_page_discard(const struct lu_env *env,
139                              const struct cl_page_slice *slice,
140                              struct cl_io *unused)
141 {
142         struct page        *vmpage  = cl2vm_page(slice);
143         struct vvp_page      *vpg     = cl2vvp_page(slice);
144
145         LASSERT(vmpage);
146         LASSERT(PageLocked(vmpage));
147
148         if (vpg->vpg_defer_uptodate && !vpg->vpg_ra_used)
149                 ll_ra_stats_inc(vmpage->mapping->host, RA_STAT_DISCARDED);
150
151         ll_invalidate_page(vmpage);
152 }
153
154 static void vvp_page_delete(const struct lu_env *env,
155                             const struct cl_page_slice *slice)
156 {
157         struct page       *vmpage = cl2vm_page(slice);
158         struct inode     *inode  = vmpage->mapping->host;
159         struct cl_object *obj    = slice->cpl_obj;
160         struct cl_page   *page   = slice->cpl_page;
161         int refc;
162
163         LASSERT(PageLocked(vmpage));
164         LASSERT((struct cl_page *)vmpage->private == page);
165         LASSERT(inode == vvp_object_inode(obj));
166
167         vvp_write_complete(cl2vvp(obj), cl2vvp_page(slice));
168
169         /* Drop the reference count held in vvp_page_init */
170         refc = atomic_dec_return(&page->cp_ref);
171         LASSERTF(refc >= 1, "page = %p, refc = %d\n", page, refc);
172
173         ClearPageUptodate(vmpage);
174         ClearPagePrivate(vmpage);
175         vmpage->private = 0;
176         /*
177          * Reference from vmpage to cl_page is removed, but the reference back
178          * is still here. It is removed later in vvp_page_fini().
179          */
180 }
181
182 static void vvp_page_export(const struct lu_env *env,
183                             const struct cl_page_slice *slice,
184                             int uptodate)
185 {
186         struct page *vmpage = cl2vm_page(slice);
187
188         LASSERT(vmpage);
189         LASSERT(PageLocked(vmpage));
190         if (uptodate)
191                 SetPageUptodate(vmpage);
192         else
193                 ClearPageUptodate(vmpage);
194 }
195
196 static int vvp_page_is_vmlocked(const struct lu_env *env,
197                                 const struct cl_page_slice *slice)
198 {
199         return PageLocked(cl2vm_page(slice)) ? -EBUSY : -ENODATA;
200 }
201
202 static int vvp_page_prep_read(const struct lu_env *env,
203                               const struct cl_page_slice *slice,
204                               struct cl_io *unused)
205 {
206         /* Skip the page already marked as PG_uptodate. */
207         return PageUptodate(cl2vm_page(slice)) ? -EALREADY : 0;
208 }
209
210 static int vvp_page_prep_write(const struct lu_env *env,
211                                const struct cl_page_slice *slice,
212                                struct cl_io *unused)
213 {
214         struct page *vmpage = cl2vm_page(slice);
215         struct cl_page *pg = slice->cpl_page;
216
217         LASSERT(PageLocked(vmpage));
218         LASSERT(!PageDirty(vmpage));
219
220         /* ll_writepage path is not a sync write, so need to set page writeback
221          * flag
222          */
223         if (!pg->cp_sync_io)
224                 set_page_writeback(vmpage);
225
226         vvp_write_pending(cl2vvp(slice->cpl_obj), cl2vvp_page(slice));
227
228         return 0;
229 }
230
231 /**
232  * Handles page transfer errors at VM level.
233  *
234  * This takes inode as a separate argument, because inode on which error is to
235  * be set can be different from \a vmpage inode in case of direct-io.
236  */
237 static void vvp_vmpage_error(struct inode *inode, struct page *vmpage, int ioret)
238 {
239         struct vvp_object *obj = cl_inode2vvp(inode);
240
241         if (ioret == 0) {
242                 ClearPageError(vmpage);
243                 obj->vob_discard_page_warned = 0;
244         } else {
245                 SetPageError(vmpage);
246                 if (ioret == -ENOSPC)
247                         set_bit(AS_ENOSPC, &inode->i_mapping->flags);
248                 else
249                         set_bit(AS_EIO, &inode->i_mapping->flags);
250
251                 if ((ioret == -ESHUTDOWN || ioret == -EINTR) &&
252                      obj->vob_discard_page_warned == 0) {
253                         obj->vob_discard_page_warned = 1;
254                         ll_dirty_page_discard_warn(vmpage, ioret);
255                 }
256         }
257 }
258
259 static void vvp_page_completion_read(const struct lu_env *env,
260                                      const struct cl_page_slice *slice,
261                                      int ioret)
262 {
263         struct vvp_page *vpg    = cl2vvp_page(slice);
264         struct page     *vmpage = vpg->vpg_page;
265         struct cl_page  *page   = slice->cpl_page;
266         struct inode    *inode  = vvp_object_inode(page->cp_obj);
267
268         LASSERT(PageLocked(vmpage));
269         CL_PAGE_HEADER(D_PAGE, env, page, "completing READ with %d\n", ioret);
270
271         if (vpg->vpg_defer_uptodate)
272                 ll_ra_count_put(ll_i2sbi(inode), 1);
273
274         if (ioret == 0)  {
275                 if (!vpg->vpg_defer_uptodate)
276                         cl_page_export(env, page, 1);
277         } else {
278                 vpg->vpg_defer_uptodate = 0;
279         }
280
281         if (!page->cp_sync_io)
282                 unlock_page(vmpage);
283 }
284
285 static void vvp_page_completion_write(const struct lu_env *env,
286                                       const struct cl_page_slice *slice,
287                                       int ioret)
288 {
289         struct vvp_page *vpg     = cl2vvp_page(slice);
290         struct cl_page  *pg     = slice->cpl_page;
291         struct page     *vmpage = vpg->vpg_page;
292
293         CL_PAGE_HEADER(D_PAGE, env, pg, "completing WRITE with %d\n", ioret);
294
295         /*
296          * TODO: Actually it makes sense to add the page into oap pending
297          * list again and so that we don't need to take the page out from
298          * SoM write pending list, if we just meet a recoverable error,
299          * -ENOMEM, etc.
300          * To implement this, we just need to return a non zero value in
301          * ->cpo_completion method. The underlying transfer should be notified
302          * and then re-add the page into pending transfer queue.  -jay
303          */
304
305         vpg->vpg_write_queued = 0;
306         vvp_write_complete(cl2vvp(slice->cpl_obj), vpg);
307
308         if (pg->cp_sync_io) {
309                 LASSERT(PageLocked(vmpage));
310                 LASSERT(!PageWriteback(vmpage));
311         } else {
312                 LASSERT(PageWriteback(vmpage));
313                 /*
314                  * Only mark the page error only when it's an async write
315                  * because applications won't wait for IO to finish.
316                  */
317                 vvp_vmpage_error(vvp_object_inode(pg->cp_obj), vmpage, ioret);
318
319                 end_page_writeback(vmpage);
320         }
321 }
322
323 /**
324  * Implements cl_page_operations::cpo_make_ready() method.
325  *
326  * This is called to yank a page from the transfer cache and to send it out as
327  * a part of transfer. This function try-locks the page. If try-lock failed,
328  * page is owned by some concurrent IO, and should be skipped (this is bad,
329  * but hopefully rare situation, as it usually results in transfer being
330  * shorter than possible).
331  *
332  * \retval 0      success, page can be placed into transfer
333  *
334  * \retval -EAGAIN page is either used by concurrent IO has been
335  * truncated. Skip it.
336  */
337 static int vvp_page_make_ready(const struct lu_env *env,
338                                const struct cl_page_slice *slice)
339 {
340         struct page *vmpage = cl2vm_page(slice);
341         struct cl_page *pg = slice->cpl_page;
342         int result = 0;
343
344         lock_page(vmpage);
345         if (clear_page_dirty_for_io(vmpage)) {
346                 LASSERT(pg->cp_state == CPS_CACHED);
347                 /* This actually clears the dirty bit in the radix tree. */
348                 set_page_writeback(vmpage);
349                 vvp_write_pending(cl2vvp(slice->cpl_obj), cl2vvp_page(slice));
350                 CL_PAGE_HEADER(D_PAGE, env, pg, "readied\n");
351         } else if (pg->cp_state == CPS_PAGEOUT) {
352                 /* is it possible for osc_flush_async_page() to already
353                  * make it ready?
354                  */
355                 result = -EALREADY;
356         } else {
357                 CL_PAGE_DEBUG(D_ERROR, env, pg, "Unexpecting page state %d.\n",
358                               pg->cp_state);
359                 LBUG();
360         }
361         unlock_page(vmpage);
362         return result;
363 }
364
365 static int vvp_page_is_under_lock(const struct lu_env *env,
366                                   const struct cl_page_slice *slice,
367                                   struct cl_io *io, pgoff_t *max_index)
368 {
369         if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE ||
370             io->ci_type == CIT_FAULT) {
371                 struct vvp_io *vio = vvp_env_io(env);
372
373                 if (unlikely(vio->vui_fd->fd_flags & LL_FILE_GROUP_LOCKED))
374                         *max_index = CL_PAGE_EOF;
375         }
376         return 0;
377 }
378
379 static int vvp_page_print(const struct lu_env *env,
380                           const struct cl_page_slice *slice,
381                           void *cookie, lu_printer_t printer)
382 {
383         struct vvp_page *vpg = cl2vvp_page(slice);
384         struct page     *vmpage = vpg->vpg_page;
385
386         (*printer)(env, cookie, LUSTRE_VVP_NAME "-page@%p(%d:%d:%d) vm@%p ",
387                    vpg, vpg->vpg_defer_uptodate, vpg->vpg_ra_used,
388                    vpg->vpg_write_queued, vmpage);
389         if (vmpage) {
390                 (*printer)(env, cookie, "%lx %d:%d %lx %lu %slru",
391                            (long)vmpage->flags, page_count(vmpage),
392                            page_mapcount(vmpage), vmpage->private,
393                            vmpage->index,
394                            list_empty(&vmpage->lru) ? "not-" : "");
395         }
396
397         (*printer)(env, cookie, "\n");
398
399         return 0;
400 }
401
402 static int vvp_page_fail(const struct lu_env *env,
403                          const struct cl_page_slice *slice)
404 {
405         /*
406          * Cached read?
407          */
408         LBUG();
409
410         return 0;
411 }
412
413 static const struct cl_page_operations vvp_page_ops = {
414         .cpo_own           = vvp_page_own,
415         .cpo_assume     = vvp_page_assume,
416         .cpo_unassume      = vvp_page_unassume,
417         .cpo_disown     = vvp_page_disown,
418         .cpo_discard       = vvp_page_discard,
419         .cpo_delete     = vvp_page_delete,
420         .cpo_export     = vvp_page_export,
421         .cpo_is_vmlocked   = vvp_page_is_vmlocked,
422         .cpo_fini         = vvp_page_fini,
423         .cpo_print       = vvp_page_print,
424         .cpo_is_under_lock = vvp_page_is_under_lock,
425         .io = {
426                 [CRT_READ] = {
427                         .cpo_prep       = vvp_page_prep_read,
428                         .cpo_completion  = vvp_page_completion_read,
429                         .cpo_make_ready = vvp_page_fail,
430                 },
431                 [CRT_WRITE] = {
432                         .cpo_prep       = vvp_page_prep_write,
433                         .cpo_completion  = vvp_page_completion_write,
434                         .cpo_make_ready  = vvp_page_make_ready,
435                 },
436         },
437 };
438
439 static int vvp_transient_page_prep(const struct lu_env *env,
440                                    const struct cl_page_slice *slice,
441                                    struct cl_io *unused)
442 {
443         /* transient page should always be sent. */
444         return 0;
445 }
446
447 static void vvp_transient_page_verify(const struct cl_page *page)
448 {
449         struct inode *inode = vvp_object_inode(page->cp_obj);
450
451         LASSERT(!inode_trylock(inode));
452 }
453
454 static int vvp_transient_page_own(const struct lu_env *env,
455                                   const struct cl_page_slice *slice,
456                                   struct cl_io *unused, int nonblock)
457 {
458         vvp_transient_page_verify(slice->cpl_page);
459         return 0;
460 }
461
462 static void vvp_transient_page_assume(const struct lu_env *env,
463                                       const struct cl_page_slice *slice,
464                                       struct cl_io *unused)
465 {
466         vvp_transient_page_verify(slice->cpl_page);
467 }
468
469 static void vvp_transient_page_unassume(const struct lu_env *env,
470                                         const struct cl_page_slice *slice,
471                                         struct cl_io *unused)
472 {
473         vvp_transient_page_verify(slice->cpl_page);
474 }
475
476 static void vvp_transient_page_disown(const struct lu_env *env,
477                                       const struct cl_page_slice *slice,
478                                       struct cl_io *unused)
479 {
480         vvp_transient_page_verify(slice->cpl_page);
481 }
482
483 static void vvp_transient_page_discard(const struct lu_env *env,
484                                        const struct cl_page_slice *slice,
485                                        struct cl_io *unused)
486 {
487         struct cl_page *page = slice->cpl_page;
488
489         vvp_transient_page_verify(slice->cpl_page);
490
491         /*
492          * For transient pages, remove it from the radix tree.
493          */
494         cl_page_delete(env, page);
495 }
496
497 static int vvp_transient_page_is_vmlocked(const struct lu_env *env,
498                                           const struct cl_page_slice *slice)
499 {
500         struct inode    *inode = vvp_object_inode(slice->cpl_obj);
501         int     locked;
502
503         locked = !inode_trylock(inode);
504         if (!locked)
505                 inode_unlock(inode);
506         return locked ? -EBUSY : -ENODATA;
507 }
508
509 static void
510 vvp_transient_page_completion(const struct lu_env *env,
511                               const struct cl_page_slice *slice,
512                               int ioret)
513 {
514         vvp_transient_page_verify(slice->cpl_page);
515 }
516
517 static void vvp_transient_page_fini(const struct lu_env *env,
518                                     struct cl_page_slice *slice)
519 {
520         struct vvp_page *vpg = cl2vvp_page(slice);
521         struct cl_page *clp = slice->cpl_page;
522         struct vvp_object *clobj = cl2vvp(clp->cp_obj);
523
524         vvp_page_fini_common(vpg);
525         LASSERT(!inode_trylock(clobj->vob_inode));
526         clobj->vob_transient_pages--;
527 }
528
529 static const struct cl_page_operations vvp_transient_page_ops = {
530         .cpo_own           = vvp_transient_page_own,
531         .cpo_assume     = vvp_transient_page_assume,
532         .cpo_unassume      = vvp_transient_page_unassume,
533         .cpo_disown     = vvp_transient_page_disown,
534         .cpo_discard       = vvp_transient_page_discard,
535         .cpo_fini         = vvp_transient_page_fini,
536         .cpo_is_vmlocked   = vvp_transient_page_is_vmlocked,
537         .cpo_print       = vvp_page_print,
538         .cpo_is_under_lock      = vvp_page_is_under_lock,
539         .io = {
540                 [CRT_READ] = {
541                         .cpo_prep       = vvp_transient_page_prep,
542                         .cpo_completion  = vvp_transient_page_completion,
543                 },
544                 [CRT_WRITE] = {
545                         .cpo_prep       = vvp_transient_page_prep,
546                         .cpo_completion  = vvp_transient_page_completion,
547                 }
548         }
549 };
550
551 int vvp_page_init(const struct lu_env *env, struct cl_object *obj,
552                 struct cl_page *page, pgoff_t index)
553 {
554         struct vvp_page *vpg = cl_object_page_slice(obj, page);
555         struct page     *vmpage = page->cp_vmpage;
556
557         CLOBINVRNT(env, obj, vvp_object_invariant(obj));
558
559         vpg->vpg_page = vmpage;
560         get_page(vmpage);
561
562         INIT_LIST_HEAD(&vpg->vpg_pending_linkage);
563         if (page->cp_type == CPT_CACHEABLE) {
564                 /* in cache, decref in vvp_page_delete */
565                 atomic_inc(&page->cp_ref);
566                 SetPagePrivate(vmpage);
567                 vmpage->private = (unsigned long)page;
568                 cl_page_slice_add(page, &vpg->vpg_cl, obj, index,
569                                   &vvp_page_ops);
570         } else {
571                 struct vvp_object *clobj = cl2vvp(obj);
572
573                 LASSERT(!inode_trylock(clobj->vob_inode));
574                 cl_page_slice_add(page, &vpg->vpg_cl, obj, index,
575                                   &vvp_transient_page_ops);
576                 clobj->vob_transient_pages++;
577         }
578         return 0;
579 }