fs: use mapping_set_error instead of opencoded set_bit
[cascardo/linux.git] / fs / afs / write.c
1 /* handling of writes to regular files and writing back to the server
2  *
3  * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4  * Written by David Howells (dhowells@redhat.com)
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11 #include <linux/backing-dev.h>
12 #include <linux/slab.h>
13 #include <linux/fs.h>
14 #include <linux/pagemap.h>
15 #include <linux/writeback.h>
16 #include <linux/pagevec.h>
17 #include "internal.h"
18
19 static int afs_write_back_from_locked_page(struct afs_writeback *wb,
20                                            struct page *page);
21
22 /*
23  * mark a page as having been made dirty and thus needing writeback
24  */
25 int afs_set_page_dirty(struct page *page)
26 {
27         _enter("");
28         return __set_page_dirty_nobuffers(page);
29 }
30
31 /*
32  * unlink a writeback record because its usage has reached zero
33  * - must be called with the wb->vnode->writeback_lock held
34  */
35 static void afs_unlink_writeback(struct afs_writeback *wb)
36 {
37         struct afs_writeback *front;
38         struct afs_vnode *vnode = wb->vnode;
39
40         list_del_init(&wb->link);
41         if (!list_empty(&vnode->writebacks)) {
42                 /* if an fsync rises to the front of the queue then wake it
43                  * up */
44                 front = list_entry(vnode->writebacks.next,
45                                    struct afs_writeback, link);
46                 if (front->state == AFS_WBACK_SYNCING) {
47                         _debug("wake up sync");
48                         front->state = AFS_WBACK_COMPLETE;
49                         wake_up(&front->waitq);
50                 }
51         }
52 }
53
54 /*
55  * free a writeback record
56  */
57 static void afs_free_writeback(struct afs_writeback *wb)
58 {
59         _enter("");
60         key_put(wb->key);
61         kfree(wb);
62 }
63
64 /*
65  * dispose of a reference to a writeback record
66  */
67 void afs_put_writeback(struct afs_writeback *wb)
68 {
69         struct afs_vnode *vnode = wb->vnode;
70
71         _enter("{%d}", wb->usage);
72
73         spin_lock(&vnode->writeback_lock);
74         if (--wb->usage == 0)
75                 afs_unlink_writeback(wb);
76         else
77                 wb = NULL;
78         spin_unlock(&vnode->writeback_lock);
79         if (wb)
80                 afs_free_writeback(wb);
81 }
82
83 /*
84  * partly or wholly fill a page that's under preparation for writing
85  */
86 static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
87                          loff_t pos, struct page *page)
88 {
89         loff_t i_size;
90         int ret;
91         int len;
92
93         _enter(",,%llu", (unsigned long long)pos);
94
95         i_size = i_size_read(&vnode->vfs_inode);
96         if (pos + PAGE_SIZE > i_size)
97                 len = i_size - pos;
98         else
99                 len = PAGE_SIZE;
100
101         ret = afs_vnode_fetch_data(vnode, key, pos, len, page);
102         if (ret < 0) {
103                 if (ret == -ENOENT) {
104                         _debug("got NOENT from server"
105                                " - marking file deleted and stale");
106                         set_bit(AFS_VNODE_DELETED, &vnode->flags);
107                         ret = -ESTALE;
108                 }
109         }
110
111         _leave(" = %d", ret);
112         return ret;
113 }
114
115 /*
116  * prepare to perform part of a write to a page
117  */
118 int afs_write_begin(struct file *file, struct address_space *mapping,
119                     loff_t pos, unsigned len, unsigned flags,
120                     struct page **pagep, void **fsdata)
121 {
122         struct afs_writeback *candidate, *wb;
123         struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
124         struct page *page;
125         struct key *key = file->private_data;
126         unsigned from = pos & (PAGE_SIZE - 1);
127         unsigned to = from + len;
128         pgoff_t index = pos >> PAGE_SHIFT;
129         int ret;
130
131         _enter("{%x:%u},{%lx},%u,%u",
132                vnode->fid.vid, vnode->fid.vnode, index, from, to);
133
134         candidate = kzalloc(sizeof(*candidate), GFP_KERNEL);
135         if (!candidate)
136                 return -ENOMEM;
137         candidate->vnode = vnode;
138         candidate->first = candidate->last = index;
139         candidate->offset_first = from;
140         candidate->to_last = to;
141         INIT_LIST_HEAD(&candidate->link);
142         candidate->usage = 1;
143         candidate->state = AFS_WBACK_PENDING;
144         init_waitqueue_head(&candidate->waitq);
145
146         page = grab_cache_page_write_begin(mapping, index, flags);
147         if (!page) {
148                 kfree(candidate);
149                 return -ENOMEM;
150         }
151         *pagep = page;
152         /* page won't leak in error case: it eventually gets cleaned off LRU */
153
154         if (!PageUptodate(page) && len != PAGE_SIZE) {
155                 ret = afs_fill_page(vnode, key, index << PAGE_SHIFT, page);
156                 if (ret < 0) {
157                         kfree(candidate);
158                         _leave(" = %d [prep]", ret);
159                         return ret;
160                 }
161                 SetPageUptodate(page);
162         }
163
164 try_again:
165         spin_lock(&vnode->writeback_lock);
166
167         /* see if this page is already pending a writeback under a suitable key
168          * - if so we can just join onto that one */
169         wb = (struct afs_writeback *) page_private(page);
170         if (wb) {
171                 if (wb->key == key && wb->state == AFS_WBACK_PENDING)
172                         goto subsume_in_current_wb;
173                 goto flush_conflicting_wb;
174         }
175
176         if (index > 0) {
177                 /* see if we can find an already pending writeback that we can
178                  * append this page to */
179                 list_for_each_entry(wb, &vnode->writebacks, link) {
180                         if (wb->last == index - 1 && wb->key == key &&
181                             wb->state == AFS_WBACK_PENDING)
182                                 goto append_to_previous_wb;
183                 }
184         }
185
186         list_add_tail(&candidate->link, &vnode->writebacks);
187         candidate->key = key_get(key);
188         spin_unlock(&vnode->writeback_lock);
189         SetPagePrivate(page);
190         set_page_private(page, (unsigned long) candidate);
191         _leave(" = 0 [new]");
192         return 0;
193
194 subsume_in_current_wb:
195         _debug("subsume");
196         ASSERTRANGE(wb->first, <=, index, <=, wb->last);
197         if (index == wb->first && from < wb->offset_first)
198                 wb->offset_first = from;
199         if (index == wb->last && to > wb->to_last)
200                 wb->to_last = to;
201         spin_unlock(&vnode->writeback_lock);
202         kfree(candidate);
203         _leave(" = 0 [sub]");
204         return 0;
205
206 append_to_previous_wb:
207         _debug("append into %lx-%lx", wb->first, wb->last);
208         wb->usage++;
209         wb->last++;
210         wb->to_last = to;
211         spin_unlock(&vnode->writeback_lock);
212         SetPagePrivate(page);
213         set_page_private(page, (unsigned long) wb);
214         kfree(candidate);
215         _leave(" = 0 [app]");
216         return 0;
217
218         /* the page is currently bound to another context, so if it's dirty we
219          * need to flush it before we can use the new context */
220 flush_conflicting_wb:
221         _debug("flush conflict");
222         if (wb->state == AFS_WBACK_PENDING)
223                 wb->state = AFS_WBACK_CONFLICTING;
224         spin_unlock(&vnode->writeback_lock);
225         if (PageDirty(page)) {
226                 ret = afs_write_back_from_locked_page(wb, page);
227                 if (ret < 0) {
228                         afs_put_writeback(candidate);
229                         _leave(" = %d", ret);
230                         return ret;
231                 }
232         }
233
234         /* the page holds a ref on the writeback record */
235         afs_put_writeback(wb);
236         set_page_private(page, 0);
237         ClearPagePrivate(page);
238         goto try_again;
239 }
240
241 /*
242  * finalise part of a write to a page
243  */
244 int afs_write_end(struct file *file, struct address_space *mapping,
245                   loff_t pos, unsigned len, unsigned copied,
246                   struct page *page, void *fsdata)
247 {
248         struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
249         loff_t i_size, maybe_i_size;
250
251         _enter("{%x:%u},{%lx}",
252                vnode->fid.vid, vnode->fid.vnode, page->index);
253
254         maybe_i_size = pos + copied;
255
256         i_size = i_size_read(&vnode->vfs_inode);
257         if (maybe_i_size > i_size) {
258                 spin_lock(&vnode->writeback_lock);
259                 i_size = i_size_read(&vnode->vfs_inode);
260                 if (maybe_i_size > i_size)
261                         i_size_write(&vnode->vfs_inode, maybe_i_size);
262                 spin_unlock(&vnode->writeback_lock);
263         }
264
265         set_page_dirty(page);
266         if (PageDirty(page))
267                 _debug("dirtied");
268         unlock_page(page);
269         put_page(page);
270
271         return copied;
272 }
273
274 /*
275  * kill all the pages in the given range
276  */
277 static void afs_kill_pages(struct afs_vnode *vnode, bool error,
278                            pgoff_t first, pgoff_t last)
279 {
280         struct pagevec pv;
281         unsigned count, loop;
282
283         _enter("{%x:%u},%lx-%lx",
284                vnode->fid.vid, vnode->fid.vnode, first, last);
285
286         pagevec_init(&pv, 0);
287
288         do {
289                 _debug("kill %lx-%lx", first, last);
290
291                 count = last - first + 1;
292                 if (count > PAGEVEC_SIZE)
293                         count = PAGEVEC_SIZE;
294                 pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping,
295                                               first, count, pv.pages);
296                 ASSERTCMP(pv.nr, ==, count);
297
298                 for (loop = 0; loop < count; loop++) {
299                         ClearPageUptodate(pv.pages[loop]);
300                         if (error)
301                                 SetPageError(pv.pages[loop]);
302                         end_page_writeback(pv.pages[loop]);
303                 }
304
305                 __pagevec_release(&pv);
306         } while (first < last);
307
308         _leave("");
309 }
310
311 /*
312  * synchronously write back the locked page and any subsequent non-locked dirty
313  * pages also covered by the same writeback record
314  */
315 static int afs_write_back_from_locked_page(struct afs_writeback *wb,
316                                            struct page *primary_page)
317 {
318         struct page *pages[8], *page;
319         unsigned long count;
320         unsigned n, offset, to;
321         pgoff_t start, first, last;
322         int loop, ret;
323
324         _enter(",%lx", primary_page->index);
325
326         count = 1;
327         if (!clear_page_dirty_for_io(primary_page))
328                 BUG();
329         if (test_set_page_writeback(primary_page))
330                 BUG();
331
332         /* find all consecutive lockable dirty pages, stopping when we find a
333          * page that is not immediately lockable, is not dirty or is missing,
334          * or we reach the end of the range */
335         start = primary_page->index;
336         if (start >= wb->last)
337                 goto no_more;
338         start++;
339         do {
340                 _debug("more %lx [%lx]", start, count);
341                 n = wb->last - start + 1;
342                 if (n > ARRAY_SIZE(pages))
343                         n = ARRAY_SIZE(pages);
344                 n = find_get_pages_contig(wb->vnode->vfs_inode.i_mapping,
345                                           start, n, pages);
346                 _debug("fgpc %u", n);
347                 if (n == 0)
348                         goto no_more;
349                 if (pages[0]->index != start) {
350                         do {
351                                 put_page(pages[--n]);
352                         } while (n > 0);
353                         goto no_more;
354                 }
355
356                 for (loop = 0; loop < n; loop++) {
357                         page = pages[loop];
358                         if (page->index > wb->last)
359                                 break;
360                         if (!trylock_page(page))
361                                 break;
362                         if (!PageDirty(page) ||
363                             page_private(page) != (unsigned long) wb) {
364                                 unlock_page(page);
365                                 break;
366                         }
367                         if (!clear_page_dirty_for_io(page))
368                                 BUG();
369                         if (test_set_page_writeback(page))
370                                 BUG();
371                         unlock_page(page);
372                         put_page(page);
373                 }
374                 count += loop;
375                 if (loop < n) {
376                         for (; loop < n; loop++)
377                                 put_page(pages[loop]);
378                         goto no_more;
379                 }
380
381                 start += loop;
382         } while (start <= wb->last && count < 65536);
383
384 no_more:
385         /* we now have a contiguous set of dirty pages, each with writeback set
386          * and the dirty mark cleared; the first page is locked and must remain
387          * so, all the rest are unlocked */
388         first = primary_page->index;
389         last = first + count - 1;
390
391         offset = (first == wb->first) ? wb->offset_first : 0;
392         to = (last == wb->last) ? wb->to_last : PAGE_SIZE;
393
394         _debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to);
395
396         ret = afs_vnode_store_data(wb, first, last, offset, to);
397         if (ret < 0) {
398                 switch (ret) {
399                 case -EDQUOT:
400                 case -ENOSPC:
401                         mapping_set_error(wb->vnode->vfs_inode.i_mapping, -ENOSPC);
402                         break;
403                 case -EROFS:
404                 case -EIO:
405                 case -EREMOTEIO:
406                 case -EFBIG:
407                 case -ENOENT:
408                 case -ENOMEDIUM:
409                 case -ENXIO:
410                         afs_kill_pages(wb->vnode, true, first, last);
411                         mapping_set_error(wb->vnode->vfs_inode.i_mapping, -EIO);
412                         break;
413                 case -EACCES:
414                 case -EPERM:
415                 case -ENOKEY:
416                 case -EKEYEXPIRED:
417                 case -EKEYREJECTED:
418                 case -EKEYREVOKED:
419                         afs_kill_pages(wb->vnode, false, first, last);
420                         break;
421                 default:
422                         break;
423                 }
424         } else {
425                 ret = count;
426         }
427
428         _leave(" = %d", ret);
429         return ret;
430 }
431
432 /*
433  * write a page back to the server
434  * - the caller locked the page for us
435  */
436 int afs_writepage(struct page *page, struct writeback_control *wbc)
437 {
438         struct afs_writeback *wb;
439         int ret;
440
441         _enter("{%lx},", page->index);
442
443         wb = (struct afs_writeback *) page_private(page);
444         ASSERT(wb != NULL);
445
446         ret = afs_write_back_from_locked_page(wb, page);
447         unlock_page(page);
448         if (ret < 0) {
449                 _leave(" = %d", ret);
450                 return 0;
451         }
452
453         wbc->nr_to_write -= ret;
454
455         _leave(" = 0");
456         return 0;
457 }
458
459 /*
460  * write a region of pages back to the server
461  */
462 static int afs_writepages_region(struct address_space *mapping,
463                                  struct writeback_control *wbc,
464                                  pgoff_t index, pgoff_t end, pgoff_t *_next)
465 {
466         struct afs_writeback *wb;
467         struct page *page;
468         int ret, n;
469
470         _enter(",,%lx,%lx,", index, end);
471
472         do {
473                 n = find_get_pages_tag(mapping, &index, PAGECACHE_TAG_DIRTY,
474                                        1, &page);
475                 if (!n)
476                         break;
477
478                 _debug("wback %lx", page->index);
479
480                 if (page->index > end) {
481                         *_next = index;
482                         put_page(page);
483                         _leave(" = 0 [%lx]", *_next);
484                         return 0;
485                 }
486
487                 /* at this point we hold neither mapping->tree_lock nor lock on
488                  * the page itself: the page may be truncated or invalidated
489                  * (changing page->mapping to NULL), or even swizzled back from
490                  * swapper_space to tmpfs file mapping
491                  */
492                 lock_page(page);
493
494                 if (page->mapping != mapping) {
495                         unlock_page(page);
496                         put_page(page);
497                         continue;
498                 }
499
500                 if (wbc->sync_mode != WB_SYNC_NONE)
501                         wait_on_page_writeback(page);
502
503                 if (PageWriteback(page) || !PageDirty(page)) {
504                         unlock_page(page);
505                         continue;
506                 }
507
508                 wb = (struct afs_writeback *) page_private(page);
509                 ASSERT(wb != NULL);
510
511                 spin_lock(&wb->vnode->writeback_lock);
512                 wb->state = AFS_WBACK_WRITING;
513                 spin_unlock(&wb->vnode->writeback_lock);
514
515                 ret = afs_write_back_from_locked_page(wb, page);
516                 unlock_page(page);
517                 put_page(page);
518                 if (ret < 0) {
519                         _leave(" = %d", ret);
520                         return ret;
521                 }
522
523                 wbc->nr_to_write -= ret;
524
525                 cond_resched();
526         } while (index < end && wbc->nr_to_write > 0);
527
528         *_next = index;
529         _leave(" = 0 [%lx]", *_next);
530         return 0;
531 }
532
533 /*
534  * write some of the pending data back to the server
535  */
536 int afs_writepages(struct address_space *mapping,
537                    struct writeback_control *wbc)
538 {
539         pgoff_t start, end, next;
540         int ret;
541
542         _enter("");
543
544         if (wbc->range_cyclic) {
545                 start = mapping->writeback_index;
546                 end = -1;
547                 ret = afs_writepages_region(mapping, wbc, start, end, &next);
548                 if (start > 0 && wbc->nr_to_write > 0 && ret == 0)
549                         ret = afs_writepages_region(mapping, wbc, 0, start,
550                                                     &next);
551                 mapping->writeback_index = next;
552         } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
553                 end = (pgoff_t)(LLONG_MAX >> PAGE_SHIFT);
554                 ret = afs_writepages_region(mapping, wbc, 0, end, &next);
555                 if (wbc->nr_to_write > 0)
556                         mapping->writeback_index = next;
557         } else {
558                 start = wbc->range_start >> PAGE_SHIFT;
559                 end = wbc->range_end >> PAGE_SHIFT;
560                 ret = afs_writepages_region(mapping, wbc, start, end, &next);
561         }
562
563         _leave(" = %d", ret);
564         return ret;
565 }
566
567 /*
568  * completion of write to server
569  */
570 void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
571 {
572         struct afs_writeback *wb = call->wb;
573         struct pagevec pv;
574         unsigned count, loop;
575         pgoff_t first = call->first, last = call->last;
576         bool free_wb;
577
578         _enter("{%x:%u},{%lx-%lx}",
579                vnode->fid.vid, vnode->fid.vnode, first, last);
580
581         ASSERT(wb != NULL);
582
583         pagevec_init(&pv, 0);
584
585         do {
586                 _debug("done %lx-%lx", first, last);
587
588                 count = last - first + 1;
589                 if (count > PAGEVEC_SIZE)
590                         count = PAGEVEC_SIZE;
591                 pv.nr = find_get_pages_contig(call->mapping, first, count,
592                                               pv.pages);
593                 ASSERTCMP(pv.nr, ==, count);
594
595                 spin_lock(&vnode->writeback_lock);
596                 for (loop = 0; loop < count; loop++) {
597                         struct page *page = pv.pages[loop];
598                         end_page_writeback(page);
599                         if (page_private(page) == (unsigned long) wb) {
600                                 set_page_private(page, 0);
601                                 ClearPagePrivate(page);
602                                 wb->usage--;
603                         }
604                 }
605                 free_wb = false;
606                 if (wb->usage == 0) {
607                         afs_unlink_writeback(wb);
608                         free_wb = true;
609                 }
610                 spin_unlock(&vnode->writeback_lock);
611                 first += count;
612                 if (free_wb) {
613                         afs_free_writeback(wb);
614                         wb = NULL;
615                 }
616
617                 __pagevec_release(&pv);
618         } while (first <= last);
619
620         _leave("");
621 }
622
623 /*
624  * write to an AFS file
625  */
626 ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from)
627 {
628         struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp));
629         ssize_t result;
630         size_t count = iov_iter_count(from);
631
632         _enter("{%x.%u},{%zu},",
633                vnode->fid.vid, vnode->fid.vnode, count);
634
635         if (IS_SWAPFILE(&vnode->vfs_inode)) {
636                 printk(KERN_INFO
637                        "AFS: Attempt to write to active swap file!\n");
638                 return -EBUSY;
639         }
640
641         if (!count)
642                 return 0;
643
644         result = generic_file_write_iter(iocb, from);
645
646         _leave(" = %zd", result);
647         return result;
648 }
649
650 /*
651  * flush the vnode to the fileserver
652  */
653 int afs_writeback_all(struct afs_vnode *vnode)
654 {
655         struct address_space *mapping = vnode->vfs_inode.i_mapping;
656         struct writeback_control wbc = {
657                 .sync_mode      = WB_SYNC_ALL,
658                 .nr_to_write    = LONG_MAX,
659                 .range_cyclic   = 1,
660         };
661         int ret;
662
663         _enter("");
664
665         ret = mapping->a_ops->writepages(mapping, &wbc);
666         __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
667
668         _leave(" = %d", ret);
669         return ret;
670 }
671
672 /*
673  * flush any dirty pages for this process, and check for write errors.
674  * - the return status from this call provides a reliable indication of
675  *   whether any write errors occurred for this process.
676  */
677 int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
678 {
679         struct inode *inode = file_inode(file);
680         struct afs_writeback *wb, *xwb;
681         struct afs_vnode *vnode = AFS_FS_I(inode);
682         int ret;
683
684         _enter("{%x:%u},{n=%pD},%d",
685                vnode->fid.vid, vnode->fid.vnode, file,
686                datasync);
687
688         ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
689         if (ret)
690                 return ret;
691         inode_lock(inode);
692
693         /* use a writeback record as a marker in the queue - when this reaches
694          * the front of the queue, all the outstanding writes are either
695          * completed or rejected */
696         wb = kzalloc(sizeof(*wb), GFP_KERNEL);
697         if (!wb) {
698                 ret = -ENOMEM;
699                 goto out;
700         }
701         wb->vnode = vnode;
702         wb->first = 0;
703         wb->last = -1;
704         wb->offset_first = 0;
705         wb->to_last = PAGE_SIZE;
706         wb->usage = 1;
707         wb->state = AFS_WBACK_SYNCING;
708         init_waitqueue_head(&wb->waitq);
709
710         spin_lock(&vnode->writeback_lock);
711         list_for_each_entry(xwb, &vnode->writebacks, link) {
712                 if (xwb->state == AFS_WBACK_PENDING)
713                         xwb->state = AFS_WBACK_CONFLICTING;
714         }
715         list_add_tail(&wb->link, &vnode->writebacks);
716         spin_unlock(&vnode->writeback_lock);
717
718         /* push all the outstanding writebacks to the server */
719         ret = afs_writeback_all(vnode);
720         if (ret < 0) {
721                 afs_put_writeback(wb);
722                 _leave(" = %d [wb]", ret);
723                 goto out;
724         }
725
726         /* wait for the preceding writes to actually complete */
727         ret = wait_event_interruptible(wb->waitq,
728                                        wb->state == AFS_WBACK_COMPLETE ||
729                                        vnode->writebacks.next == &wb->link);
730         afs_put_writeback(wb);
731         _leave(" = %d", ret);
732 out:
733         inode_unlock(inode);
734         return ret;
735 }
736
737 /*
738  * notification that a previously read-only page is about to become writable
739  * - if it returns an error, the caller will deliver a bus error signal
740  */
741 int afs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
742 {
743         struct afs_vnode *vnode = AFS_FS_I(vma->vm_file->f_mapping->host);
744
745         _enter("{{%x:%u}},{%lx}",
746                vnode->fid.vid, vnode->fid.vnode, page->index);
747
748         /* wait for the page to be written to the cache before we allow it to
749          * be modified */
750 #ifdef CONFIG_AFS_FSCACHE
751         fscache_wait_on_page_write(vnode->cache, page);
752 #endif
753
754         _leave(" = 0");
755         return 0;
756 }