Merge branch 'linux-3.17' of git://anongit.freedesktop.org/git/nouveau/linux-2.6...
[cascardo/linux.git] / fs / f2fs / node.c
index 9dfb9a0..d3d90d2 100644 (file)
@@ -25,6 +25,7 @@
 
 static struct kmem_cache *nat_entry_slab;
 static struct kmem_cache *free_nid_slab;
+static struct kmem_cache *nat_entry_set_slab;
 
 bool available_free_memory(struct f2fs_sb_info *sbi, int type)
 {
@@ -42,6 +43,8 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
                mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12;
                res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2);
        } else if (type == DIRTY_DENTS) {
+               if (sbi->sb->s_bdi->dirty_exceeded)
+                       return false;
                mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
                res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1);
        }
@@ -88,12 +91,8 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
 
        /* get current nat block page with lock */
        src_page = get_meta_page(sbi, src_off);
-
-       /* Dirty src_page means that it is already the new target NAT page. */
-       if (PageDirty(src_page))
-               return src_page;
-
        dst_page = grab_meta_page(sbi, dst_off);
+       f2fs_bug_on(PageDirty(src_page));
 
        src_addr = page_address(src_page);
        dst_addr = page_address(dst_page);
@@ -843,7 +842,7 @@ void remove_inode_page(struct inode *inode)
        truncate_node(&dn);
 }
 
-struct page *new_inode_page(struct inode *inode, const struct qstr *name)
+struct page *new_inode_page(struct inode *inode)
 {
        struct dnode_of_data dn;
 
@@ -1232,12 +1231,12 @@ static int f2fs_write_node_page(struct page *page,
        if (wbc->for_reclaim)
                goto redirty_out;
 
-       mutex_lock(&sbi->node_write);
+       down_read(&sbi->node_write);
        set_page_writeback(page);
        write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
        set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));
        dec_page_count(sbi, F2FS_DIRTY_NODES);
-       mutex_unlock(&sbi->node_write);
+       up_read(&sbi->node_write);
        unlock_page(page);
        return 0;
 
@@ -1550,7 +1549,7 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page,
        clear_node_page_dirty(page);
 }
 
-static void recover_inline_xattr(struct inode *inode, struct page *page)
+void recover_inline_xattr(struct inode *inode, struct page *page)
 {
        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
        void *src_addr, *dst_addr;
@@ -1589,8 +1588,6 @@ bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr)
        nid_t new_xnid = nid_of_node(page);
        struct node_info ni;
 
-       recover_inline_xattr(inode, page);
-
        if (!f2fs_has_xattr_block(ofs_of_node(page)))
                return false;
 
@@ -1742,7 +1739,90 @@ skip:
        return err;
 }
 
-static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
+static struct nat_entry_set *grab_nat_entry_set(void)
+{
+       struct nat_entry_set *nes =
+                       f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC);
+
+       nes->entry_cnt = 0;
+       INIT_LIST_HEAD(&nes->set_list);
+       INIT_LIST_HEAD(&nes->entry_list);
+       return nes;
+}
+
+static void release_nat_entry_set(struct nat_entry_set *nes,
+                                               struct f2fs_nm_info *nm_i)
+{
+       f2fs_bug_on(!list_empty(&nes->entry_list));
+
+       nm_i->dirty_nat_cnt -= nes->entry_cnt;
+       list_del(&nes->set_list);
+       kmem_cache_free(nat_entry_set_slab, nes);
+}
+
+static void adjust_nat_entry_set(struct nat_entry_set *nes,
+                                               struct list_head *head)
+{
+       struct nat_entry_set *next = nes;
+
+       if (list_is_last(&nes->set_list, head))
+               return;
+
+       list_for_each_entry_continue(next, head, set_list)
+               if (nes->entry_cnt <= next->entry_cnt)
+                       break;
+
+       list_move_tail(&nes->set_list, &next->set_list);
+}
+
+static void add_nat_entry(struct nat_entry *ne, struct list_head *head)
+{
+       struct nat_entry_set *nes;
+       nid_t start_nid = START_NID(ne->ni.nid);
+
+       list_for_each_entry(nes, head, set_list) {
+               if (nes->start_nid == start_nid) {
+                       list_move_tail(&ne->list, &nes->entry_list);
+                       nes->entry_cnt++;
+                       adjust_nat_entry_set(nes, head);
+                       return;
+               }
+       }
+
+       nes = grab_nat_entry_set();
+
+       nes->start_nid = start_nid;
+       list_move_tail(&ne->list, &nes->entry_list);
+       nes->entry_cnt++;
+       list_add(&nes->set_list, head);
+}
+
+static void merge_nats_in_set(struct f2fs_sb_info *sbi)
+{
+       struct f2fs_nm_info *nm_i = NM_I(sbi);
+       struct list_head *dirty_list = &nm_i->dirty_nat_entries;
+       struct list_head *set_list = &nm_i->nat_entry_set;
+       struct nat_entry *ne, *tmp;
+
+       write_lock(&nm_i->nat_tree_lock);
+       list_for_each_entry_safe(ne, tmp, dirty_list, list) {
+               if (nat_get_blkaddr(ne) == NEW_ADDR)
+                       continue;
+               add_nat_entry(ne, set_list);
+               nm_i->dirty_nat_cnt++;
+       }
+       write_unlock(&nm_i->nat_tree_lock);
+}
+
+static bool __has_cursum_space(struct f2fs_summary_block *sum, int size)
+{
+       if (nats_in_cursum(sum) + size <= NAT_JOURNAL_ENTRIES)
+               return true;
+       else
+               return false;
+}
+
+static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -1750,12 +1830,6 @@ static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
        int i;
 
        mutex_lock(&curseg->curseg_mutex);
-
-       if (nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES) {
-               mutex_unlock(&curseg->curseg_mutex);
-               return false;
-       }
-
        for (i = 0; i < nats_in_cursum(sum); i++) {
                struct nat_entry *ne;
                struct f2fs_nat_entry raw_ne;
@@ -1765,23 +1839,21 @@ static bool flush_nats_in_journal(struct f2fs_sb_info *sbi)
 retry:
                write_lock(&nm_i->nat_tree_lock);
                ne = __lookup_nat_cache(nm_i, nid);
-               if (ne) {
-                       __set_nat_cache_dirty(nm_i, ne);
-                       write_unlock(&nm_i->nat_tree_lock);
-                       continue;
-               }
+               if (ne)
+                       goto found;
+
                ne = grab_nat_entry(nm_i, nid);
                if (!ne) {
                        write_unlock(&nm_i->nat_tree_lock);
                        goto retry;
                }
                node_info_from_raw_nat(&ne->ni, &raw_ne);
+found:
                __set_nat_cache_dirty(nm_i, ne);
                write_unlock(&nm_i->nat_tree_lock);
        }
        update_nats_in_cursum(sum, -i);
        mutex_unlock(&curseg->curseg_mutex);
-       return true;
 }
 
 /*
@@ -1792,80 +1864,91 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
        struct f2fs_summary_block *sum = curseg->sum_blk;
-       struct nat_entry *ne, *cur;
-       struct page *page = NULL;
-       struct f2fs_nat_block *nat_blk = NULL;
-       nid_t start_nid = 0, end_nid = 0;
-       bool flushed;
+       struct nat_entry_set *nes, *tmp;
+       struct list_head *head = &nm_i->nat_entry_set;
+       bool to_journal = true;
 
-       flushed = flush_nats_in_journal(sbi);
-
-       if (!flushed)
-               mutex_lock(&curseg->curseg_mutex);
-
-       /* 1) flush dirty nat caches */
-       list_for_each_entry_safe(ne, cur, &nm_i->dirty_nat_entries, list) {
-               nid_t nid;
-               struct f2fs_nat_entry raw_ne;
-               int offset = -1;
-
-               if (nat_get_blkaddr(ne) == NEW_ADDR)
-                       continue;
+       /* merge nat entries of dirty list to nat entry set temporarily */
+       merge_nats_in_set(sbi);
 
-               nid = nat_get_nid(ne);
+       /*
+        * if there are no enough space in journal to store dirty nat
+        * entries, remove all entries from journal and merge them
+        * into nat entry set.
+        */
+       if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt)) {
+               remove_nats_in_journal(sbi);
 
-               if (flushed)
-                       goto to_nat_page;
+               /*
+                * merge nat entries of dirty list to nat entry set temporarily
+                */
+               merge_nats_in_set(sbi);
+       }
 
-               /* if there is room for nat enries in curseg->sumpage */
-               offset = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 1);
-               if (offset >= 0) {
-                       raw_ne = nat_in_journal(sum, offset);
-                       goto flush_now;
-               }
-to_nat_page:
-               if (!page || (start_nid > nid || nid > end_nid)) {
-                       if (page) {
-                               f2fs_put_page(page, 1);
-                               page = NULL;
-                       }
-                       start_nid = START_NID(nid);
-                       end_nid = start_nid + NAT_ENTRY_PER_BLOCK - 1;
+       if (!nm_i->dirty_nat_cnt)
+               return;
 
-                       /*
-                        * get nat block with dirty flag, increased reference
-                        * count, mapped and lock
-                        */
+       /*
+        * there are two steps to flush nat entries:
+        * #1, flush nat entries to journal in current hot data summary block.
+        * #2, flush nat entries to nat page.
+        */
+       list_for_each_entry_safe(nes, tmp, head, set_list) {
+               struct f2fs_nat_block *nat_blk;
+               struct nat_entry *ne, *cur;
+               struct page *page;
+               nid_t start_nid = nes->start_nid;
+
+               if (to_journal && !__has_cursum_space(sum, nes->entry_cnt))
+                       to_journal = false;
+
+               if (to_journal) {
+                       mutex_lock(&curseg->curseg_mutex);
+               } else {
                        page = get_next_nat_page(sbi, start_nid);
                        nat_blk = page_address(page);
+                       f2fs_bug_on(!nat_blk);
                }
 
-               f2fs_bug_on(!nat_blk);
-               raw_ne = nat_blk->entries[nid - start_nid];
-flush_now:
-               raw_nat_from_node_info(&raw_ne, &ne->ni);
-
-               if (offset < 0) {
-                       nat_blk->entries[nid - start_nid] = raw_ne;
-               } else {
-                       nat_in_journal(sum, offset) = raw_ne;
-                       nid_in_journal(sum, offset) = cpu_to_le32(nid);
-               }
+               /* flush dirty nats in nat entry set */
+               list_for_each_entry_safe(ne, cur, &nes->entry_list, list) {
+                       struct f2fs_nat_entry *raw_ne;
+                       nid_t nid = nat_get_nid(ne);
+                       int offset;
+
+                       if (to_journal) {
+                               offset = lookup_journal_in_cursum(sum,
+                                                       NAT_JOURNAL, nid, 1);
+                               f2fs_bug_on(offset < 0);
+                               raw_ne = &nat_in_journal(sum, offset);
+                               nid_in_journal(sum, offset) = cpu_to_le32(nid);
+                       } else {
+                               raw_ne = &nat_blk->entries[nid - start_nid];
+                       }
+                       raw_nat_from_node_info(raw_ne, &ne->ni);
 
-               if (nat_get_blkaddr(ne) == NULL_ADDR &&
+                       if (nat_get_blkaddr(ne) == NULL_ADDR &&
                                add_free_nid(sbi, nid, false) <= 0) {
-                       write_lock(&nm_i->nat_tree_lock);
-                       __del_from_nat_cache(nm_i, ne);
-                       write_unlock(&nm_i->nat_tree_lock);
-               } else {
-                       write_lock(&nm_i->nat_tree_lock);
-                       __clear_nat_cache_dirty(nm_i, ne);
-                       write_unlock(&nm_i->nat_tree_lock);
+                               write_lock(&nm_i->nat_tree_lock);
+                               __del_from_nat_cache(nm_i, ne);
+                               write_unlock(&nm_i->nat_tree_lock);
+                       } else {
+                               write_lock(&nm_i->nat_tree_lock);
+                               __clear_nat_cache_dirty(nm_i, ne);
+                               write_unlock(&nm_i->nat_tree_lock);
+                       }
                }
+
+               if (to_journal)
+                       mutex_unlock(&curseg->curseg_mutex);
+               else
+                       f2fs_put_page(page, 1);
+
+               release_nat_entry_set(nes, nm_i);
        }
-       if (!flushed)
-               mutex_unlock(&curseg->curseg_mutex);
-       f2fs_put_page(page, 1);
+
+       f2fs_bug_on(!list_empty(head));
+       f2fs_bug_on(nm_i->dirty_nat_cnt);
 }
 
 static int init_node_manager(struct f2fs_sb_info *sbi)
@@ -1894,6 +1977,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
        INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC);
        INIT_LIST_HEAD(&nm_i->nat_entries);
        INIT_LIST_HEAD(&nm_i->dirty_nat_entries);
+       INIT_LIST_HEAD(&nm_i->nat_entry_set);
 
        mutex_init(&nm_i->build_lock);
        spin_lock_init(&nm_i->free_nid_list_lock);
@@ -1974,19 +2058,30 @@ int __init create_node_manager_caches(void)
        nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
                        sizeof(struct nat_entry));
        if (!nat_entry_slab)
-               return -ENOMEM;
+               goto fail;
 
        free_nid_slab = f2fs_kmem_cache_create("free_nid",
                        sizeof(struct free_nid));
-       if (!free_nid_slab) {
-               kmem_cache_destroy(nat_entry_slab);
-               return -ENOMEM;
-       }
+       if (!free_nid_slab)
+               goto destory_nat_entry;
+
+       nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set",
+                       sizeof(struct nat_entry_set));
+       if (!nat_entry_set_slab)
+               goto destory_free_nid;
        return 0;
+
+destory_free_nid:
+       kmem_cache_destroy(free_nid_slab);
+destory_nat_entry:
+       kmem_cache_destroy(nat_entry_slab);
+fail:
+       return -ENOMEM;
 }
 
 void destroy_node_manager_caches(void)
 {
+       kmem_cache_destroy(nat_entry_set_slab);
        kmem_cache_destroy(free_nid_slab);
        kmem_cache_destroy(nat_entry_slab);
 }