Merge remote-tracking branches 'asoc/topic/simple', 'asoc/topic/sirf', 'asoc/topic...
[cascardo/linux.git] / fs / f2fs / checkpoint.c
1 /*
2  * fs/f2fs/checkpoint.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/fs.h>
12 #include <linux/bio.h>
13 #include <linux/mpage.h>
14 #include <linux/writeback.h>
15 #include <linux/blkdev.h>
16 #include <linux/f2fs_fs.h>
17 #include <linux/pagevec.h>
18 #include <linux/swap.h>
19
20 #include "f2fs.h"
21 #include "node.h"
22 #include "segment.h"
23 #include <trace/events/f2fs.h>
24
25 static struct kmem_cache *ino_entry_slab;
26 static struct kmem_cache *inode_entry_slab;
27
28 /*
29  * We guarantee no failure on the returned page.
30  */
31 struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
32 {
33         struct address_space *mapping = META_MAPPING(sbi);
34         struct page *page = NULL;
35 repeat:
36         page = grab_cache_page(mapping, index);
37         if (!page) {
38                 cond_resched();
39                 goto repeat;
40         }
41         f2fs_wait_on_page_writeback(page, META);
42         SetPageUptodate(page);
43         return page;
44 }
45
46 /*
47  * We guarantee no failure on the returned page.
48  */
49 struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
50 {
51         struct address_space *mapping = META_MAPPING(sbi);
52         struct page *page;
53 repeat:
54         page = grab_cache_page(mapping, index);
55         if (!page) {
56                 cond_resched();
57                 goto repeat;
58         }
59         if (PageUptodate(page))
60                 goto out;
61
62         if (f2fs_submit_page_bio(sbi, page, index,
63                                 READ_SYNC | REQ_META | REQ_PRIO))
64                 goto repeat;
65
66         lock_page(page);
67         if (unlikely(page->mapping != mapping)) {
68                 f2fs_put_page(page, 1);
69                 goto repeat;
70         }
71 out:
72         return page;
73 }
74
75 static inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type)
76 {
77         switch (type) {
78         case META_NAT:
79                 return NM_I(sbi)->max_nid / NAT_ENTRY_PER_BLOCK;
80         case META_SIT:
81                 return SIT_BLK_CNT(sbi);
82         case META_SSA:
83         case META_CP:
84                 return 0;
85         default:
86                 BUG();
87         }
88 }
89
90 /*
91  * Readahead CP/NAT/SIT/SSA pages
92  */
93 int ra_meta_pages(struct f2fs_sb_info *sbi, int start, int nrpages, int type)
94 {
95         block_t prev_blk_addr = 0;
96         struct page *page;
97         int blkno = start;
98         int max_blks = get_max_meta_blks(sbi, type);
99
100         struct f2fs_io_info fio = {
101                 .type = META,
102                 .rw = READ_SYNC | REQ_META | REQ_PRIO
103         };
104
105         for (; nrpages-- > 0; blkno++) {
106                 block_t blk_addr;
107
108                 switch (type) {
109                 case META_NAT:
110                         /* get nat block addr */
111                         if (unlikely(blkno >= max_blks))
112                                 blkno = 0;
113                         blk_addr = current_nat_addr(sbi,
114                                         blkno * NAT_ENTRY_PER_BLOCK);
115                         break;
116                 case META_SIT:
117                         /* get sit block addr */
118                         if (unlikely(blkno >= max_blks))
119                                 goto out;
120                         blk_addr = current_sit_addr(sbi,
121                                         blkno * SIT_ENTRY_PER_BLOCK);
122                         if (blkno != start && prev_blk_addr + 1 != blk_addr)
123                                 goto out;
124                         prev_blk_addr = blk_addr;
125                         break;
126                 case META_SSA:
127                 case META_CP:
128                         /* get ssa/cp block addr */
129                         blk_addr = blkno;
130                         break;
131                 default:
132                         BUG();
133                 }
134
135                 page = grab_cache_page(META_MAPPING(sbi), blk_addr);
136                 if (!page)
137                         continue;
138                 if (PageUptodate(page)) {
139                         f2fs_put_page(page, 1);
140                         continue;
141                 }
142
143                 f2fs_submit_page_mbio(sbi, page, blk_addr, &fio);
144                 f2fs_put_page(page, 0);
145         }
146 out:
147         f2fs_submit_merged_bio(sbi, META, READ);
148         return blkno - start;
149 }
150
151 static int f2fs_write_meta_page(struct page *page,
152                                 struct writeback_control *wbc)
153 {
154         struct inode *inode = page->mapping->host;
155         struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
156
157         trace_f2fs_writepage(page, META);
158
159         if (unlikely(sbi->por_doing))
160                 goto redirty_out;
161         if (wbc->for_reclaim)
162                 goto redirty_out;
163         if (unlikely(f2fs_cp_error(sbi)))
164                 goto redirty_out;
165
166         f2fs_wait_on_page_writeback(page, META);
167         write_meta_page(sbi, page);
168         dec_page_count(sbi, F2FS_DIRTY_META);
169         unlock_page(page);
170         return 0;
171
172 redirty_out:
173         redirty_page_for_writepage(wbc, page);
174         return AOP_WRITEPAGE_ACTIVATE;
175 }
176
177 static int f2fs_write_meta_pages(struct address_space *mapping,
178                                 struct writeback_control *wbc)
179 {
180         struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
181         long diff, written;
182
183         trace_f2fs_writepages(mapping->host, wbc, META);
184
185         /* collect a number of dirty meta pages and write together */
186         if (wbc->for_kupdate ||
187                 get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
188                 goto skip_write;
189
190         /* if mounting is failed, skip writing node pages */
191         mutex_lock(&sbi->cp_mutex);
192         diff = nr_pages_to_write(sbi, META, wbc);
193         written = sync_meta_pages(sbi, META, wbc->nr_to_write);
194         mutex_unlock(&sbi->cp_mutex);
195         wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
196         return 0;
197
198 skip_write:
199         wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META);
200         return 0;
201 }
202
203 long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
204                                                 long nr_to_write)
205 {
206         struct address_space *mapping = META_MAPPING(sbi);
207         pgoff_t index = 0, end = LONG_MAX;
208         struct pagevec pvec;
209         long nwritten = 0;
210         struct writeback_control wbc = {
211                 .for_reclaim = 0,
212         };
213
214         pagevec_init(&pvec, 0);
215
216         while (index <= end) {
217                 int i, nr_pages;
218                 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
219                                 PAGECACHE_TAG_DIRTY,
220                                 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
221                 if (unlikely(nr_pages == 0))
222                         break;
223
224                 for (i = 0; i < nr_pages; i++) {
225                         struct page *page = pvec.pages[i];
226
227                         lock_page(page);
228
229                         if (unlikely(page->mapping != mapping)) {
230 continue_unlock:
231                                 unlock_page(page);
232                                 continue;
233                         }
234                         if (!PageDirty(page)) {
235                                 /* someone wrote it for us */
236                                 goto continue_unlock;
237                         }
238
239                         if (!clear_page_dirty_for_io(page))
240                                 goto continue_unlock;
241
242                         if (f2fs_write_meta_page(page, &wbc)) {
243                                 unlock_page(page);
244                                 break;
245                         }
246                         nwritten++;
247                         if (unlikely(nwritten >= nr_to_write))
248                                 break;
249                 }
250                 pagevec_release(&pvec);
251                 cond_resched();
252         }
253
254         if (nwritten)
255                 f2fs_submit_merged_bio(sbi, type, WRITE);
256
257         return nwritten;
258 }
259
260 static int f2fs_set_meta_page_dirty(struct page *page)
261 {
262         struct address_space *mapping = page->mapping;
263         struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
264
265         trace_f2fs_set_page_dirty(page, META);
266
267         SetPageUptodate(page);
268         if (!PageDirty(page)) {
269                 __set_page_dirty_nobuffers(page);
270                 inc_page_count(sbi, F2FS_DIRTY_META);
271                 return 1;
272         }
273         return 0;
274 }
275
276 const struct address_space_operations f2fs_meta_aops = {
277         .writepage      = f2fs_write_meta_page,
278         .writepages     = f2fs_write_meta_pages,
279         .set_page_dirty = f2fs_set_meta_page_dirty,
280 };
281
282 static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
283 {
284         struct ino_entry *e;
285 retry:
286         spin_lock(&sbi->ino_lock[type]);
287
288         e = radix_tree_lookup(&sbi->ino_root[type], ino);
289         if (!e) {
290                 e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC);
291                 if (!e) {
292                         spin_unlock(&sbi->ino_lock[type]);
293                         goto retry;
294                 }
295                 if (radix_tree_insert(&sbi->ino_root[type], ino, e)) {
296                         spin_unlock(&sbi->ino_lock[type]);
297                         kmem_cache_free(ino_entry_slab, e);
298                         goto retry;
299                 }
300                 memset(e, 0, sizeof(struct ino_entry));
301                 e->ino = ino;
302
303                 list_add_tail(&e->list, &sbi->ino_list[type]);
304         }
305         spin_unlock(&sbi->ino_lock[type]);
306 }
307
308 static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
309 {
310         struct ino_entry *e;
311
312         spin_lock(&sbi->ino_lock[type]);
313         e = radix_tree_lookup(&sbi->ino_root[type], ino);
314         if (e) {
315                 list_del(&e->list);
316                 radix_tree_delete(&sbi->ino_root[type], ino);
317                 if (type == ORPHAN_INO)
318                         sbi->n_orphans--;
319                 spin_unlock(&sbi->ino_lock[type]);
320                 kmem_cache_free(ino_entry_slab, e);
321                 return;
322         }
323         spin_unlock(&sbi->ino_lock[type]);
324 }
325
326 void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
327 {
328         /* add new dirty ino entry into list */
329         __add_ino_entry(sbi, ino, type);
330 }
331
332 void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
333 {
334         /* remove dirty ino entry from list */
335         __remove_ino_entry(sbi, ino, type);
336 }
337
338 /* mode should be APPEND_INO or UPDATE_INO */
339 bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
340 {
341         struct ino_entry *e;
342         spin_lock(&sbi->ino_lock[mode]);
343         e = radix_tree_lookup(&sbi->ino_root[mode], ino);
344         spin_unlock(&sbi->ino_lock[mode]);
345         return e ? true : false;
346 }
347
348 void release_dirty_inode(struct f2fs_sb_info *sbi)
349 {
350         struct ino_entry *e, *tmp;
351         int i;
352
353         for (i = APPEND_INO; i <= UPDATE_INO; i++) {
354                 spin_lock(&sbi->ino_lock[i]);
355                 list_for_each_entry_safe(e, tmp, &sbi->ino_list[i], list) {
356                         list_del(&e->list);
357                         radix_tree_delete(&sbi->ino_root[i], e->ino);
358                         kmem_cache_free(ino_entry_slab, e);
359                 }
360                 spin_unlock(&sbi->ino_lock[i]);
361         }
362 }
363
364 int acquire_orphan_inode(struct f2fs_sb_info *sbi)
365 {
366         int err = 0;
367
368         spin_lock(&sbi->ino_lock[ORPHAN_INO]);
369         if (unlikely(sbi->n_orphans >= sbi->max_orphans))
370                 err = -ENOSPC;
371         else
372                 sbi->n_orphans++;
373         spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
374
375         return err;
376 }
377
378 void release_orphan_inode(struct f2fs_sb_info *sbi)
379 {
380         spin_lock(&sbi->ino_lock[ORPHAN_INO]);
381         f2fs_bug_on(sbi->n_orphans == 0);
382         sbi->n_orphans--;
383         spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
384 }
385
386 void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
387 {
388         /* add new orphan ino entry into list */
389         __add_ino_entry(sbi, ino, ORPHAN_INO);
390 }
391
392 void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
393 {
394         /* remove orphan entry from orphan list */
395         __remove_ino_entry(sbi, ino, ORPHAN_INO);
396 }
397
398 static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
399 {
400         struct inode *inode = f2fs_iget(sbi->sb, ino);
401         f2fs_bug_on(IS_ERR(inode));
402         clear_nlink(inode);
403
404         /* truncate all the data during iput */
405         iput(inode);
406 }
407
408 void recover_orphan_inodes(struct f2fs_sb_info *sbi)
409 {
410         block_t start_blk, orphan_blkaddr, i, j;
411
412         if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
413                 return;
414
415         sbi->por_doing = true;
416
417         start_blk = __start_cp_addr(sbi) + 1 +
418                 le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
419         orphan_blkaddr = __start_sum_addr(sbi) - 1;
420
421         ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP);
422
423         for (i = 0; i < orphan_blkaddr; i++) {
424                 struct page *page = get_meta_page(sbi, start_blk + i);
425                 struct f2fs_orphan_block *orphan_blk;
426
427                 orphan_blk = (struct f2fs_orphan_block *)page_address(page);
428                 for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
429                         nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
430                         recover_orphan_inode(sbi, ino);
431                 }
432                 f2fs_put_page(page, 1);
433         }
434         /* clear Orphan Flag */
435         clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
436         sbi->por_doing = false;
437         return;
438 }
439
440 static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
441 {
442         struct list_head *head;
443         struct f2fs_orphan_block *orphan_blk = NULL;
444         unsigned int nentries = 0;
445         unsigned short index;
446         unsigned short orphan_blocks =
447                         (unsigned short)GET_ORPHAN_BLOCKS(sbi->n_orphans);
448         struct page *page = NULL;
449         struct ino_entry *orphan = NULL;
450
451         for (index = 0; index < orphan_blocks; index++)
452                 grab_meta_page(sbi, start_blk + index);
453
454         index = 1;
455         spin_lock(&sbi->ino_lock[ORPHAN_INO]);
456         head = &sbi->ino_list[ORPHAN_INO];
457
458         /* loop for each orphan inode entry and write them in Jornal block */
459         list_for_each_entry(orphan, head, list) {
460                 if (!page) {
461                         page = find_get_page(META_MAPPING(sbi), start_blk++);
462                         f2fs_bug_on(!page);
463                         orphan_blk =
464                                 (struct f2fs_orphan_block *)page_address(page);
465                         memset(orphan_blk, 0, sizeof(*orphan_blk));
466                         f2fs_put_page(page, 0);
467                 }
468
469                 orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
470
471                 if (nentries == F2FS_ORPHANS_PER_BLOCK) {
472                         /*
473                          * an orphan block is full of 1020 entries,
474                          * then we need to flush current orphan blocks
475                          * and bring another one in memory
476                          */
477                         orphan_blk->blk_addr = cpu_to_le16(index);
478                         orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
479                         orphan_blk->entry_count = cpu_to_le32(nentries);
480                         set_page_dirty(page);
481                         f2fs_put_page(page, 1);
482                         index++;
483                         nentries = 0;
484                         page = NULL;
485                 }
486         }
487
488         if (page) {
489                 orphan_blk->blk_addr = cpu_to_le16(index);
490                 orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
491                 orphan_blk->entry_count = cpu_to_le32(nentries);
492                 set_page_dirty(page);
493                 f2fs_put_page(page, 1);
494         }
495
496         spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
497 }
498
499 static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
500                                 block_t cp_addr, unsigned long long *version)
501 {
502         struct page *cp_page_1, *cp_page_2 = NULL;
503         unsigned long blk_size = sbi->blocksize;
504         struct f2fs_checkpoint *cp_block;
505         unsigned long long cur_version = 0, pre_version = 0;
506         size_t crc_offset;
507         __u32 crc = 0;
508
509         /* Read the 1st cp block in this CP pack */
510         cp_page_1 = get_meta_page(sbi, cp_addr);
511
512         /* get the version number */
513         cp_block = (struct f2fs_checkpoint *)page_address(cp_page_1);
514         crc_offset = le32_to_cpu(cp_block->checksum_offset);
515         if (crc_offset >= blk_size)
516                 goto invalid_cp1;
517
518         crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset)));
519         if (!f2fs_crc_valid(crc, cp_block, crc_offset))
520                 goto invalid_cp1;
521
522         pre_version = cur_cp_version(cp_block);
523
524         /* Read the 2nd cp block in this CP pack */
525         cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
526         cp_page_2 = get_meta_page(sbi, cp_addr);
527
528         cp_block = (struct f2fs_checkpoint *)page_address(cp_page_2);
529         crc_offset = le32_to_cpu(cp_block->checksum_offset);
530         if (crc_offset >= blk_size)
531                 goto invalid_cp2;
532
533         crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset)));
534         if (!f2fs_crc_valid(crc, cp_block, crc_offset))
535                 goto invalid_cp2;
536
537         cur_version = cur_cp_version(cp_block);
538
539         if (cur_version == pre_version) {
540                 *version = cur_version;
541                 f2fs_put_page(cp_page_2, 1);
542                 return cp_page_1;
543         }
544 invalid_cp2:
545         f2fs_put_page(cp_page_2, 1);
546 invalid_cp1:
547         f2fs_put_page(cp_page_1, 1);
548         return NULL;
549 }
550
551 int get_valid_checkpoint(struct f2fs_sb_info *sbi)
552 {
553         struct f2fs_checkpoint *cp_block;
554         struct f2fs_super_block *fsb = sbi->raw_super;
555         struct page *cp1, *cp2, *cur_page;
556         unsigned long blk_size = sbi->blocksize;
557         unsigned long long cp1_version = 0, cp2_version = 0;
558         unsigned long long cp_start_blk_no;
559         unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
560         block_t cp_blk_no;
561         int i;
562
563         sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL);
564         if (!sbi->ckpt)
565                 return -ENOMEM;
566         /*
567          * Finding out valid cp block involves read both
568          * sets( cp pack1 and cp pack 2)
569          */
570         cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr);
571         cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version);
572
573         /* The second checkpoint pack should start at the next segment */
574         cp_start_blk_no += ((unsigned long long)1) <<
575                                 le32_to_cpu(fsb->log_blocks_per_seg);
576         cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version);
577
578         if (cp1 && cp2) {
579                 if (ver_after(cp2_version, cp1_version))
580                         cur_page = cp2;
581                 else
582                         cur_page = cp1;
583         } else if (cp1) {
584                 cur_page = cp1;
585         } else if (cp2) {
586                 cur_page = cp2;
587         } else {
588                 goto fail_no_cp;
589         }
590
591         cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
592         memcpy(sbi->ckpt, cp_block, blk_size);
593
594         if (cp_blks <= 1)
595                 goto done;
596
597         cp_blk_no = le32_to_cpu(fsb->cp_blkaddr);
598         if (cur_page == cp2)
599                 cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);
600
601         for (i = 1; i < cp_blks; i++) {
602                 void *sit_bitmap_ptr;
603                 unsigned char *ckpt = (unsigned char *)sbi->ckpt;
604
605                 cur_page = get_meta_page(sbi, cp_blk_no + i);
606                 sit_bitmap_ptr = page_address(cur_page);
607                 memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
608                 f2fs_put_page(cur_page, 1);
609         }
610 done:
611         f2fs_put_page(cp1, 1);
612         f2fs_put_page(cp2, 1);
613         return 0;
614
615 fail_no_cp:
616         kfree(sbi->ckpt);
617         return -EINVAL;
618 }
619
620 static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
621 {
622         struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
623
624         if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR))
625                 return -EEXIST;
626
627         set_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
628         F2FS_I(inode)->dirty_dir = new;
629         list_add_tail(&new->list, &sbi->dir_inode_list);
630         stat_inc_dirty_dir(sbi);
631         return 0;
632 }
633
634 void set_dirty_dir_page(struct inode *inode, struct page *page)
635 {
636         struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
637         struct dir_inode_entry *new;
638         int ret = 0;
639
640         if (!S_ISDIR(inode->i_mode))
641                 return;
642
643         new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
644         new->inode = inode;
645         INIT_LIST_HEAD(&new->list);
646
647         spin_lock(&sbi->dir_inode_lock);
648         ret = __add_dirty_inode(inode, new);
649         inode_inc_dirty_dents(inode);
650         SetPagePrivate(page);
651         spin_unlock(&sbi->dir_inode_lock);
652
653         if (ret)
654                 kmem_cache_free(inode_entry_slab, new);
655 }
656
657 void add_dirty_dir_inode(struct inode *inode)
658 {
659         struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
660         struct dir_inode_entry *new =
661                         f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
662         int ret = 0;
663
664         new->inode = inode;
665         INIT_LIST_HEAD(&new->list);
666
667         spin_lock(&sbi->dir_inode_lock);
668         ret = __add_dirty_inode(inode, new);
669         spin_unlock(&sbi->dir_inode_lock);
670
671         if (ret)
672                 kmem_cache_free(inode_entry_slab, new);
673 }
674
675 void remove_dirty_dir_inode(struct inode *inode)
676 {
677         struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
678         struct dir_inode_entry *entry;
679
680         if (!S_ISDIR(inode->i_mode))
681                 return;
682
683         spin_lock(&sbi->dir_inode_lock);
684         if (get_dirty_dents(inode) ||
685                         !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) {
686                 spin_unlock(&sbi->dir_inode_lock);
687                 return;
688         }
689
690         entry = F2FS_I(inode)->dirty_dir;
691         list_del(&entry->list);
692         F2FS_I(inode)->dirty_dir = NULL;
693         clear_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
694         stat_dec_dirty_dir(sbi);
695         spin_unlock(&sbi->dir_inode_lock);
696         kmem_cache_free(inode_entry_slab, entry);
697
698         /* Only from the recovery routine */
699         if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) {
700                 clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT);
701                 iput(inode);
702         }
703 }
704
705 void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
706 {
707         struct list_head *head;
708         struct dir_inode_entry *entry;
709         struct inode *inode;
710 retry:
711         spin_lock(&sbi->dir_inode_lock);
712
713         head = &sbi->dir_inode_list;
714         if (list_empty(head)) {
715                 spin_unlock(&sbi->dir_inode_lock);
716                 return;
717         }
718         entry = list_entry(head->next, struct dir_inode_entry, list);
719         inode = igrab(entry->inode);
720         spin_unlock(&sbi->dir_inode_lock);
721         if (inode) {
722                 filemap_fdatawrite(inode->i_mapping);
723                 iput(inode);
724         } else {
725                 /*
726                  * We should submit bio, since it exists several
727                  * wribacking dentry pages in the freeing inode.
728                  */
729                 f2fs_submit_merged_bio(sbi, DATA, WRITE);
730         }
731         goto retry;
732 }
733
734 /*
735  * Freeze all the FS-operations for checkpoint.
736  */
737 static int block_operations(struct f2fs_sb_info *sbi)
738 {
739         struct writeback_control wbc = {
740                 .sync_mode = WB_SYNC_ALL,
741                 .nr_to_write = LONG_MAX,
742                 .for_reclaim = 0,
743         };
744         struct blk_plug plug;
745         int err = 0;
746
747         blk_start_plug(&plug);
748
749 retry_flush_dents:
750         f2fs_lock_all(sbi);
751         /* write all the dirty dentry pages */
752         if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
753                 f2fs_unlock_all(sbi);
754                 sync_dirty_dir_inodes(sbi);
755                 if (unlikely(f2fs_cp_error(sbi))) {
756                         err = -EIO;
757                         goto out;
758                 }
759                 goto retry_flush_dents;
760         }
761
762         /*
763          * POR: we should ensure that there are no dirty node pages
764          * until finishing nat/sit flush.
765          */
766 retry_flush_nodes:
767         down_write(&sbi->node_write);
768
769         if (get_pages(sbi, F2FS_DIRTY_NODES)) {
770                 up_write(&sbi->node_write);
771                 sync_node_pages(sbi, 0, &wbc);
772                 if (unlikely(f2fs_cp_error(sbi))) {
773                         f2fs_unlock_all(sbi);
774                         err = -EIO;
775                         goto out;
776                 }
777                 goto retry_flush_nodes;
778         }
779 out:
780         blk_finish_plug(&plug);
781         return err;
782 }
783
784 static void unblock_operations(struct f2fs_sb_info *sbi)
785 {
786         up_write(&sbi->node_write);
787         f2fs_unlock_all(sbi);
788 }
789
790 static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
791 {
792         DEFINE_WAIT(wait);
793
794         for (;;) {
795                 prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
796
797                 if (!get_pages(sbi, F2FS_WRITEBACK))
798                         break;
799
800                 io_schedule();
801         }
802         finish_wait(&sbi->cp_wait, &wait);
803 }
804
805 static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
806 {
807         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
808         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
809         nid_t last_nid = 0;
810         block_t start_blk;
811         struct page *cp_page;
812         unsigned int data_sum_blocks, orphan_blocks;
813         __u32 crc32 = 0;
814         void *kaddr;
815         int i;
816         int cp_payload_blks = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
817
818         /*
819          * This avoids to conduct wrong roll-forward operations and uses
820          * metapages, so should be called prior to sync_meta_pages below.
821          */
822         discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg));
823
824         /* Flush all the NAT/SIT pages */
825         while (get_pages(sbi, F2FS_DIRTY_META)) {
826                 sync_meta_pages(sbi, META, LONG_MAX);
827                 if (unlikely(f2fs_cp_error(sbi)))
828                         return;
829         }
830
831         next_free_nid(sbi, &last_nid);
832
833         /*
834          * modify checkpoint
835          * version number is already updated
836          */
837         ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
838         ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
839         ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
840         for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
841                 ckpt->cur_node_segno[i] =
842                         cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE));
843                 ckpt->cur_node_blkoff[i] =
844                         cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE));
845                 ckpt->alloc_type[i + CURSEG_HOT_NODE] =
846                                 curseg_alloc_type(sbi, i + CURSEG_HOT_NODE);
847         }
848         for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
849                 ckpt->cur_data_segno[i] =
850                         cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA));
851                 ckpt->cur_data_blkoff[i] =
852                         cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA));
853                 ckpt->alloc_type[i + CURSEG_HOT_DATA] =
854                                 curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
855         }
856
857         ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
858         ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
859         ckpt->next_free_nid = cpu_to_le32(last_nid);
860
861         /* 2 cp  + n data seg summary + orphan inode blocks */
862         data_sum_blocks = npages_for_summary_flush(sbi);
863         if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
864                 set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
865         else
866                 clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
867
868         orphan_blocks = GET_ORPHAN_BLOCKS(sbi->n_orphans);
869         ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
870                         orphan_blocks);
871
872         if (is_umount) {
873                 set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
874                 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+
875                                 cp_payload_blks + data_sum_blocks +
876                                 orphan_blocks + NR_CURSEG_NODE_TYPE);
877         } else {
878                 clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
879                 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS +
880                                 cp_payload_blks + data_sum_blocks +
881                                 orphan_blocks);
882         }
883
884         if (sbi->n_orphans)
885                 set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
886         else
887                 clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
888
889         /* update SIT/NAT bitmap */
890         get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
891         get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
892
893         crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset));
894         *((__le32 *)((unsigned char *)ckpt +
895                                 le32_to_cpu(ckpt->checksum_offset)))
896                                 = cpu_to_le32(crc32);
897
898         start_blk = __start_cp_addr(sbi);
899
900         /* write out checkpoint buffer at block 0 */
901         cp_page = grab_meta_page(sbi, start_blk++);
902         kaddr = page_address(cp_page);
903         memcpy(kaddr, ckpt, (1 << sbi->log_blocksize));
904         set_page_dirty(cp_page);
905         f2fs_put_page(cp_page, 1);
906
907         for (i = 1; i < 1 + cp_payload_blks; i++) {
908                 cp_page = grab_meta_page(sbi, start_blk++);
909                 kaddr = page_address(cp_page);
910                 memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE,
911                                 (1 << sbi->log_blocksize));
912                 set_page_dirty(cp_page);
913                 f2fs_put_page(cp_page, 1);
914         }
915
916         if (sbi->n_orphans) {
917                 write_orphan_inodes(sbi, start_blk);
918                 start_blk += orphan_blocks;
919         }
920
921         write_data_summaries(sbi, start_blk);
922         start_blk += data_sum_blocks;
923         if (is_umount) {
924                 write_node_summaries(sbi, start_blk);
925                 start_blk += NR_CURSEG_NODE_TYPE;
926         }
927
928         /* writeout checkpoint block */
929         cp_page = grab_meta_page(sbi, start_blk);
930         kaddr = page_address(cp_page);
931         memcpy(kaddr, ckpt, (1 << sbi->log_blocksize));
932         set_page_dirty(cp_page);
933         f2fs_put_page(cp_page, 1);
934
935         /* wait for previous submitted node/meta pages writeback */
936         wait_on_all_pages_writeback(sbi);
937
938         if (unlikely(f2fs_cp_error(sbi)))
939                 return;
940
941         filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX);
942         filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX);
943
944         /* update user_block_counts */
945         sbi->last_valid_block_count = sbi->total_valid_block_count;
946         sbi->alloc_valid_block_count = 0;
947
948         /* Here, we only have one bio having CP pack */
949         sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
950
951         release_dirty_inode(sbi);
952
953         if (unlikely(f2fs_cp_error(sbi)))
954                 return;
955
956         clear_prefree_segments(sbi);
957         F2FS_RESET_SB_DIRT(sbi);
958 }
959
960 /*
961  * We guarantee that this checkpoint procedure will not fail.
962  */
963 void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
964 {
965         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
966         unsigned long long ckpt_ver;
967
968         trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops");
969
970         mutex_lock(&sbi->cp_mutex);
971
972         if (!sbi->s_dirty)
973                 goto out;
974         if (unlikely(f2fs_cp_error(sbi)))
975                 goto out;
976         if (block_operations(sbi))
977                 goto out;
978
979         trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops");
980
981         f2fs_submit_merged_bio(sbi, DATA, WRITE);
982         f2fs_submit_merged_bio(sbi, NODE, WRITE);
983         f2fs_submit_merged_bio(sbi, META, WRITE);
984
985         /*
986          * update checkpoint pack index
987          * Increase the version number so that
988          * SIT entries and seg summaries are written at correct place
989          */
990         ckpt_ver = cur_cp_version(ckpt);
991         ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
992
993         /* write cached NAT/SIT entries to NAT/SIT area */
994         flush_nat_entries(sbi);
995         flush_sit_entries(sbi);
996
997         /* unlock all the fs_lock[] in do_checkpoint() */
998         do_checkpoint(sbi, is_umount);
999
1000         unblock_operations(sbi);
1001         stat_inc_cp_count(sbi->stat_info);
1002 out:
1003         mutex_unlock(&sbi->cp_mutex);
1004         trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
1005 }
1006
1007 void init_ino_entry_info(struct f2fs_sb_info *sbi)
1008 {
1009         int i;
1010
1011         for (i = 0; i < MAX_INO_ENTRY; i++) {
1012                 INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC);
1013                 spin_lock_init(&sbi->ino_lock[i]);
1014                 INIT_LIST_HEAD(&sbi->ino_list[i]);
1015         }
1016
1017         /*
1018          * considering 512 blocks in a segment 8 blocks are needed for cp
1019          * and log segment summaries. Remaining blocks are used to keep
1020          * orphan entries with the limitation one reserved segment
1021          * for cp pack we can have max 1020*504 orphan entries
1022          */
1023         sbi->n_orphans = 0;
1024         sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
1025                         NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK;
1026 }
1027
1028 int __init create_checkpoint_caches(void)
1029 {
1030         ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry",
1031                         sizeof(struct ino_entry));
1032         if (!ino_entry_slab)
1033                 return -ENOMEM;
1034         inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
1035                         sizeof(struct dir_inode_entry));
1036         if (!inode_entry_slab) {
1037                 kmem_cache_destroy(ino_entry_slab);
1038                 return -ENOMEM;
1039         }
1040         return 0;
1041 }
1042
1043 void destroy_checkpoint_caches(void)
1044 {
1045         kmem_cache_destroy(ino_entry_slab);
1046         kmem_cache_destroy(inode_entry_slab);
1047 }