Merge tag 'for-f2fs-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk...
[cascardo/linux.git] / fs / f2fs / node.c
index d186769..b2fa4b6 100644 (file)
@@ -52,6 +52,10 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
                mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
                                                        PAGE_SHIFT;
                res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
+               if (excess_cached_nats(sbi))
+                       res = false;
+               if (nm_i->nat_cnt > DEF_NAT_CACHE_THRESHOLD)
+                       res = false;
        } else if (type == DIRTY_DENTS) {
                if (sbi->sb->s_bdi->wb.dirty_exceeded)
                        return false;
@@ -202,14 +206,14 @@ int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
        struct nat_entry *e;
        bool need = false;
 
-       down_read(&nm_i->nat_tree_lock);
+       percpu_down_read(&nm_i->nat_tree_lock);
        e = __lookup_nat_cache(nm_i, nid);
        if (e) {
                if (!get_nat_flag(e, IS_CHECKPOINTED) &&
                                !get_nat_flag(e, HAS_FSYNCED_INODE))
                        need = true;
        }
-       up_read(&nm_i->nat_tree_lock);
+       percpu_up_read(&nm_i->nat_tree_lock);
        return need;
 }
 
@@ -219,11 +223,11 @@ bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
        struct nat_entry *e;
        bool is_cp = true;
 
-       down_read(&nm_i->nat_tree_lock);
+       percpu_down_read(&nm_i->nat_tree_lock);
        e = __lookup_nat_cache(nm_i, nid);
        if (e && !get_nat_flag(e, IS_CHECKPOINTED))
                is_cp = false;
-       up_read(&nm_i->nat_tree_lock);
+       percpu_up_read(&nm_i->nat_tree_lock);
        return is_cp;
 }
 
@@ -233,13 +237,13 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
        struct nat_entry *e;
        bool need_update = true;
 
-       down_read(&nm_i->nat_tree_lock);
+       percpu_down_read(&nm_i->nat_tree_lock);
        e = __lookup_nat_cache(nm_i, ino);
        if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
                        (get_nat_flag(e, IS_CHECKPOINTED) ||
                         get_nat_flag(e, HAS_FSYNCED_INODE)))
                need_update = false;
-       up_read(&nm_i->nat_tree_lock);
+       percpu_up_read(&nm_i->nat_tree_lock);
        return need_update;
 }
 
@@ -280,7 +284,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        struct nat_entry *e;
 
-       down_write(&nm_i->nat_tree_lock);
+       percpu_down_write(&nm_i->nat_tree_lock);
        e = __lookup_nat_cache(nm_i, ni->nid);
        if (!e) {
                e = grab_nat_entry(nm_i, ni->nid);
@@ -330,7 +334,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
                        set_nat_flag(e, HAS_FSYNCED_INODE, true);
                set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
        }
-       up_write(&nm_i->nat_tree_lock);
+       percpu_up_write(&nm_i->nat_tree_lock);
 }
 
 int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
@@ -338,8 +342,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        int nr = nr_shrink;
 
-       if (!down_write_trylock(&nm_i->nat_tree_lock))
-               return 0;
+       percpu_down_write(&nm_i->nat_tree_lock);
 
        while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
                struct nat_entry *ne;
@@ -348,7 +351,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
                __del_from_nat_cache(nm_i, ne);
                nr_shrink--;
        }
-       up_write(&nm_i->nat_tree_lock);
+       percpu_up_write(&nm_i->nat_tree_lock);
        return nr - nr_shrink;
 }
 
@@ -370,13 +373,13 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
        ni->nid = nid;
 
        /* Check nat cache */
-       down_read(&nm_i->nat_tree_lock);
+       percpu_down_read(&nm_i->nat_tree_lock);
        e = __lookup_nat_cache(nm_i, nid);
        if (e) {
                ni->ino = nat_get_ino(e);
                ni->blk_addr = nat_get_blkaddr(e);
                ni->version = nat_get_version(e);
-               up_read(&nm_i->nat_tree_lock);
+               percpu_up_read(&nm_i->nat_tree_lock);
                return;
        }
 
@@ -400,11 +403,11 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
        node_info_from_raw_nat(ni, &ne);
        f2fs_put_page(page, 1);
 cache:
-       up_read(&nm_i->nat_tree_lock);
+       percpu_up_read(&nm_i->nat_tree_lock);
        /* cache nat entry */
-       down_write(&nm_i->nat_tree_lock);
+       percpu_down_write(&nm_i->nat_tree_lock);
        cache_nat_entry(sbi, nid, &ne);
-       up_write(&nm_i->nat_tree_lock);
+       percpu_up_write(&nm_i->nat_tree_lock);
 }
 
 /*
@@ -646,6 +649,7 @@ release_out:
        if (err == -ENOENT) {
                dn->cur_level = i;
                dn->max_level = level;
+               dn->ofs_in_node = offset[level];
        }
        return err;
 }
@@ -670,8 +674,7 @@ static void truncate_node(struct dnode_of_data *dn)
        if (dn->nid == dn->inode->i_ino) {
                remove_orphan_inode(sbi, dn->nid);
                dec_valid_inode_count(sbi);
-       } else {
-               sync_inode_page(dn);
+               f2fs_inode_synced(dn->inode);
        }
 invalidate:
        clear_node_page_dirty(dn->node_page);
@@ -953,7 +956,7 @@ int truncate_xattr_node(struct inode *inode, struct page *page)
        if (IS_ERR(npage))
                return PTR_ERR(npage);
 
-       F2FS_I(inode)->i_xattr_nid = 0;
+       f2fs_i_xnid_write(inode, 0);
 
        /* need to do checkpoint during fsync */
        F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
@@ -1019,7 +1022,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
        struct page *page;
        int err;
 
-       if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
+       if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
                return ERR_PTR(-EPERM);
 
        page = f2fs_grab_cache_page(NODE_MAPPING(sbi), dn->nid, false);
@@ -1042,21 +1045,16 @@ struct page *new_node_page(struct dnode_of_data *dn,
        f2fs_wait_on_page_writeback(page, NODE, true);
        fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
        set_cold_node(dn->inode, page);
-       SetPageUptodate(page);
+       if (!PageUptodate(page))
+               SetPageUptodate(page);
        if (set_page_dirty(page))
                dn->node_changed = true;
 
        if (f2fs_has_xattr_block(ofs))
-               F2FS_I(dn->inode)->i_xattr_nid = dn->nid;
+               f2fs_i_xnid_write(dn->inode, dn->nid);
 
-       dn->node_page = page;
-       if (ipage)
-               update_inode(dn->inode, ipage);
-       else
-               sync_inode_page(dn);
        if (ofs == 0)
                inc_valid_inode_count(sbi);
-
        return page;
 
 fail:
@@ -1083,6 +1081,9 @@ static int read_node_page(struct page *page, int op_flags)
                .encrypted_page = NULL,
        };
 
+       if (PageUptodate(page))
+               return LOCKED_PAGE;
+
        get_node_info(sbi, page->index, &ni);
 
        if (unlikely(ni.blk_addr == NULL_ADDR)) {
@@ -1090,9 +1091,6 @@ static int read_node_page(struct page *page, int op_flags)
                return -ENOENT;
        }
 
-       if (PageUptodate(page))
-               return LOCKED_PAGE;
-
        fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr;
        return f2fs_submit_page_bio(&fio);
 }
@@ -1150,16 +1148,21 @@ repeat:
 
        lock_page(page);
 
-       if (unlikely(!PageUptodate(page))) {
-               f2fs_put_page(page, 1);
-               return ERR_PTR(-EIO);
-       }
        if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
                f2fs_put_page(page, 1);
                goto repeat;
        }
+
+       if (unlikely(!PageUptodate(page)))
+               goto out_err;
 page_hit:
-       f2fs_bug_on(sbi, nid != nid_of_node(page));
+       if(unlikely(nid != nid_of_node(page))) {
+               f2fs_bug_on(sbi, 1);
+               ClearPageUptodate(page);
+out_err:
+               f2fs_put_page(page, 1);
+               return ERR_PTR(-EIO);
+       }
        return page;
 }
 
@@ -1176,24 +1179,6 @@ struct page *get_node_page_ra(struct page *parent, int start)
        return __get_node_page(sbi, nid, parent, start);
 }
 
-void sync_inode_page(struct dnode_of_data *dn)
-{
-       int ret = 0;
-
-       if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) {
-               ret = update_inode(dn->inode, dn->node_page);
-       } else if (dn->inode_page) {
-               if (!dn->inode_page_locked)
-                       lock_page(dn->inode_page);
-               ret = update_inode(dn->inode, dn->inode_page);
-               if (!dn->inode_page_locked)
-                       unlock_page(dn->inode_page);
-       } else {
-               ret = update_inode_page(dn->inode);
-       }
-       dn->node_changed = ret ? true: false;
-}
-
 static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
 {
        struct inode *inode;
@@ -1319,7 +1304,7 @@ continue_unlock:
        return last_page;
 }
 
-int fsync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
+int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
                        struct writeback_control *wbc, bool atomic)
 {
        pgoff_t index, end;
@@ -1327,6 +1312,7 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
        int ret = 0;
        struct page *last_page = NULL;
        bool marked = false;
+       nid_t ino = inode->i_ino;
 
        if (atomic) {
                last_page = last_fsync_dnode(sbi, ino);
@@ -1380,9 +1366,13 @@ continue_unlock:
 
                        if (!atomic || page == last_page) {
                                set_fsync_mark(page, 1);
-                               if (IS_INODE(page))
+                               if (IS_INODE(page)) {
+                                       if (is_inode_flag_set(inode,
+                                                               FI_DIRTY_INODE))
+                                               update_inode(inode, page);
                                        set_dentry_mark(page,
                                                need_dentry_mark(sbi, ino));
+                               }
                                /*  may be written by other thread */
                                if (!PageDirty(page))
                                        set_page_dirty(page);
@@ -1630,6 +1620,7 @@ static int f2fs_write_node_pages(struct address_space *mapping,
                            struct writeback_control *wbc)
 {
        struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
+       struct blk_plug plug;
        long diff;
 
        /* balancing f2fs's metadata in background */
@@ -1643,7 +1634,9 @@ static int f2fs_write_node_pages(struct address_space *mapping,
 
        diff = nr_pages_to_write(sbi, NODE, wbc);
        wbc->sync_mode = WB_SYNC_NONE;
+       blk_start_plug(&plug);
        sync_node_pages(sbi, wbc);
+       blk_finish_plug(&plug);
        wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
        return 0;
 
@@ -1657,9 +1650,10 @@ static int f2fs_set_node_page_dirty(struct page *page)
 {
        trace_f2fs_set_page_dirty(page, NODE);
 
-       SetPageUptodate(page);
+       if (!PageUptodate(page))
+               SetPageUptodate(page);
        if (!PageDirty(page)) {
-               __set_page_dirty_nobuffers(page);
+               f2fs_set_page_dirty_nobuffers(page);
                inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
                SetPagePrivate(page);
                f2fs_trace_pid(page);
@@ -1778,7 +1772,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
        }
 }
 
-static void build_free_nids(struct f2fs_sb_info *sbi)
+void build_free_nids(struct f2fs_sb_info *sbi)
 {
        struct f2fs_nm_info *nm_i = NM_I(sbi);
        struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -1787,14 +1781,14 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
        nid_t nid = nm_i->next_scan_nid;
 
        /* Enough entries */
-       if (nm_i->fcnt > NAT_ENTRY_PER_BLOCK)
+       if (nm_i->fcnt >= NAT_ENTRY_PER_BLOCK)
                return;
 
        /* readahead nat pages to be scanned */
        ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
                                                        META_NAT, true);
 
-       down_read(&nm_i->nat_tree_lock);
+       percpu_down_read(&nm_i->nat_tree_lock);
 
        while (1) {
                struct page *page = get_current_nat_page(sbi, nid);
@@ -1826,7 +1820,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
                        remove_free_nid(nm_i, nid);
        }
        up_read(&curseg->journal_rwsem);
-       up_read(&nm_i->nat_tree_lock);
+       percpu_up_read(&nm_i->nat_tree_lock);
 
        ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
                                        nm_i->ra_nid_pages, META_NAT, false);
@@ -1925,12 +1919,15 @@ int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
        struct free_nid *i, *next;
        int nr = nr_shrink;
 
+       if (nm_i->fcnt <= MAX_FREE_NIDS)
+               return 0;
+
        if (!mutex_trylock(&nm_i->build_lock))
                return 0;
 
        spin_lock(&nm_i->free_nid_list_lock);
        list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) {
-               if (nr_shrink <= 0 || nm_i->fcnt <= NAT_ENTRY_PER_BLOCK)
+               if (nr_shrink <= 0 || nm_i->fcnt <= MAX_FREE_NIDS)
                        break;
                if (i->state == NID_ALLOC)
                        continue;
@@ -1957,7 +1954,7 @@ void recover_inline_xattr(struct inode *inode, struct page *page)
 
        ri = F2FS_INODE(page);
        if (!(ri->i_inline & F2FS_INLINE_XATTR)) {
-               clear_inode_flag(F2FS_I(inode), FI_INLINE_XATTR);
+               clear_inode_flag(inode, FI_INLINE_XATTR);
                goto update_inode;
        }
 
@@ -1999,13 +1996,11 @@ recover_xnid:
        get_node_info(sbi, new_xnid, &ni);
        ni.ino = inode->i_ino;
        set_node_addr(sbi, &ni, NEW_ADDR, false);
-       F2FS_I(inode)->i_xattr_nid = new_xnid;
+       f2fs_i_xnid_write(inode, new_xnid);
 
        /* 3: update xattr blkaddr */
        refresh_sit_entry(sbi, NEW_ADDR, blkaddr);
        set_node_addr(sbi, &ni, blkaddr, false);
-
-       update_inode_page(inode);
 }
 
 int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
@@ -2027,7 +2022,8 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
        /* Should not use this inode from free nid list */
        remove_free_nid(NM_I(sbi), ino);
 
-       SetPageUptodate(ipage);
+       if (!PageUptodate(ipage))
+               SetPageUptodate(ipage);
        fill_node_footer(ipage, ino, ino, 0, true);
 
        src = F2FS_INODE(page);
@@ -2213,7 +2209,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
        if (!nm_i->dirty_nat_cnt)
                return;
 
-       down_write(&nm_i->nat_tree_lock);
+       percpu_down_write(&nm_i->nat_tree_lock);
 
        /*
         * if there are no enough space in journal to store dirty nat
@@ -2236,7 +2232,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
        list_for_each_entry_safe(set, tmp, &sets, set_list)
                __flush_nat_entry_set(sbi, set);
 
-       up_write(&nm_i->nat_tree_lock);
+       percpu_up_write(&nm_i->nat_tree_lock);
 
        f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
 }
@@ -2272,7 +2268,8 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
 
        mutex_init(&nm_i->build_lock);
        spin_lock_init(&nm_i->free_nid_list_lock);
-       init_rwsem(&nm_i->nat_tree_lock);
+       if (percpu_init_rwsem(&nm_i->nat_tree_lock))
+               return -ENOMEM;
 
        nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
        nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
@@ -2329,7 +2326,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
        spin_unlock(&nm_i->free_nid_list_lock);
 
        /* destroy nat cache */
-       down_write(&nm_i->nat_tree_lock);
+       percpu_down_write(&nm_i->nat_tree_lock);
        while ((found = __gang_lookup_nat_cache(nm_i,
                                        nid, NATVEC_SIZE, natvec))) {
                unsigned idx;
@@ -2354,8 +2351,9 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
                        kmem_cache_free(nat_entry_set_slab, setvec[idx]);
                }
        }
-       up_write(&nm_i->nat_tree_lock);
+       percpu_up_write(&nm_i->nat_tree_lock);
 
+       percpu_free_rwsem(&nm_i->nat_tree_lock);
        kfree(nm_i->nat_bitmap);
        sbi->nm_info = NULL;
        kfree(nm_i);