ASoC: ssm4567: Reset device before regcache_sync()
[cascardo/linux.git] / fs / btrfs / delayed-inode.c
1 /*
2  * Copyright (C) 2011 Fujitsu.  All rights reserved.
3  * Written by Miao Xie <miaox@cn.fujitsu.com>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public
7  * License v2 as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public
15  * License along with this program; if not, write to the
16  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17  * Boston, MA 021110-1307, USA.
18  */
19
20 #include <linux/slab.h>
21 #include "delayed-inode.h"
22 #include "disk-io.h"
23 #include "transaction.h"
24 #include "ctree.h"
25
26 #define BTRFS_DELAYED_WRITEBACK         512
27 #define BTRFS_DELAYED_BACKGROUND        128
28 #define BTRFS_DELAYED_BATCH             16
29
30 static struct kmem_cache *delayed_node_cache;
31
32 int __init btrfs_delayed_inode_init(void)
33 {
34         delayed_node_cache = kmem_cache_create("btrfs_delayed_node",
35                                         sizeof(struct btrfs_delayed_node),
36                                         0,
37                                         SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
38                                         NULL);
39         if (!delayed_node_cache)
40                 return -ENOMEM;
41         return 0;
42 }
43
44 void btrfs_delayed_inode_exit(void)
45 {
46         if (delayed_node_cache)
47                 kmem_cache_destroy(delayed_node_cache);
48 }
49
50 static inline void btrfs_init_delayed_node(
51                                 struct btrfs_delayed_node *delayed_node,
52                                 struct btrfs_root *root, u64 inode_id)
53 {
54         delayed_node->root = root;
55         delayed_node->inode_id = inode_id;
56         atomic_set(&delayed_node->refs, 0);
57         delayed_node->ins_root = RB_ROOT;
58         delayed_node->del_root = RB_ROOT;
59         mutex_init(&delayed_node->mutex);
60         INIT_LIST_HEAD(&delayed_node->n_list);
61         INIT_LIST_HEAD(&delayed_node->p_list);
62 }
63
64 static inline int btrfs_is_continuous_delayed_item(
65                                         struct btrfs_delayed_item *item1,
66                                         struct btrfs_delayed_item *item2)
67 {
68         if (item1->key.type == BTRFS_DIR_INDEX_KEY &&
69             item1->key.objectid == item2->key.objectid &&
70             item1->key.type == item2->key.type &&
71             item1->key.offset + 1 == item2->key.offset)
72                 return 1;
73         return 0;
74 }
75
76 static inline struct btrfs_delayed_root *btrfs_get_delayed_root(
77                                                         struct btrfs_root *root)
78 {
79         return root->fs_info->delayed_root;
80 }
81
82 static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
83 {
84         struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
85         struct btrfs_root *root = btrfs_inode->root;
86         u64 ino = btrfs_ino(inode);
87         struct btrfs_delayed_node *node;
88
89         node = ACCESS_ONCE(btrfs_inode->delayed_node);
90         if (node) {
91                 atomic_inc(&node->refs);
92                 return node;
93         }
94
95         spin_lock(&root->inode_lock);
96         node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
97         if (node) {
98                 if (btrfs_inode->delayed_node) {
99                         atomic_inc(&node->refs);        /* can be accessed */
100                         BUG_ON(btrfs_inode->delayed_node != node);
101                         spin_unlock(&root->inode_lock);
102                         return node;
103                 }
104                 btrfs_inode->delayed_node = node;
105                 /* can be accessed and cached in the inode */
106                 atomic_add(2, &node->refs);
107                 spin_unlock(&root->inode_lock);
108                 return node;
109         }
110         spin_unlock(&root->inode_lock);
111
112         return NULL;
113 }
114
115 /* Will return either the node or PTR_ERR(-ENOMEM) */
116 static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
117                                                         struct inode *inode)
118 {
119         struct btrfs_delayed_node *node;
120         struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
121         struct btrfs_root *root = btrfs_inode->root;
122         u64 ino = btrfs_ino(inode);
123         int ret;
124
125 again:
126         node = btrfs_get_delayed_node(inode);
127         if (node)
128                 return node;
129
130         node = kmem_cache_zalloc(delayed_node_cache, GFP_NOFS);
131         if (!node)
132                 return ERR_PTR(-ENOMEM);
133         btrfs_init_delayed_node(node, root, ino);
134
135         /* cached in the btrfs inode and can be accessed */
136         atomic_add(2, &node->refs);
137
138         ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
139         if (ret) {
140                 kmem_cache_free(delayed_node_cache, node);
141                 return ERR_PTR(ret);
142         }
143
144         spin_lock(&root->inode_lock);
145         ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
146         if (ret == -EEXIST) {
147                 spin_unlock(&root->inode_lock);
148                 kmem_cache_free(delayed_node_cache, node);
149                 radix_tree_preload_end();
150                 goto again;
151         }
152         btrfs_inode->delayed_node = node;
153         spin_unlock(&root->inode_lock);
154         radix_tree_preload_end();
155
156         return node;
157 }
158
159 /*
160  * Call it when holding delayed_node->mutex
161  *
162  * If mod = 1, add this node into the prepared list.
163  */
164 static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
165                                      struct btrfs_delayed_node *node,
166                                      int mod)
167 {
168         spin_lock(&root->lock);
169         if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
170                 if (!list_empty(&node->p_list))
171                         list_move_tail(&node->p_list, &root->prepare_list);
172                 else if (mod)
173                         list_add_tail(&node->p_list, &root->prepare_list);
174         } else {
175                 list_add_tail(&node->n_list, &root->node_list);
176                 list_add_tail(&node->p_list, &root->prepare_list);
177                 atomic_inc(&node->refs);        /* inserted into list */
178                 root->nodes++;
179                 set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
180         }
181         spin_unlock(&root->lock);
182 }
183
184 /* Call it when holding delayed_node->mutex */
185 static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
186                                        struct btrfs_delayed_node *node)
187 {
188         spin_lock(&root->lock);
189         if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
190                 root->nodes--;
191                 atomic_dec(&node->refs);        /* not in the list */
192                 list_del_init(&node->n_list);
193                 if (!list_empty(&node->p_list))
194                         list_del_init(&node->p_list);
195                 clear_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
196         }
197         spin_unlock(&root->lock);
198 }
199
200 static struct btrfs_delayed_node *btrfs_first_delayed_node(
201                         struct btrfs_delayed_root *delayed_root)
202 {
203         struct list_head *p;
204         struct btrfs_delayed_node *node = NULL;
205
206         spin_lock(&delayed_root->lock);
207         if (list_empty(&delayed_root->node_list))
208                 goto out;
209
210         p = delayed_root->node_list.next;
211         node = list_entry(p, struct btrfs_delayed_node, n_list);
212         atomic_inc(&node->refs);
213 out:
214         spin_unlock(&delayed_root->lock);
215
216         return node;
217 }
218
219 static struct btrfs_delayed_node *btrfs_next_delayed_node(
220                                                 struct btrfs_delayed_node *node)
221 {
222         struct btrfs_delayed_root *delayed_root;
223         struct list_head *p;
224         struct btrfs_delayed_node *next = NULL;
225
226         delayed_root = node->root->fs_info->delayed_root;
227         spin_lock(&delayed_root->lock);
228         if (!test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
229                 /* not in the list */
230                 if (list_empty(&delayed_root->node_list))
231                         goto out;
232                 p = delayed_root->node_list.next;
233         } else if (list_is_last(&node->n_list, &delayed_root->node_list))
234                 goto out;
235         else
236                 p = node->n_list.next;
237
238         next = list_entry(p, struct btrfs_delayed_node, n_list);
239         atomic_inc(&next->refs);
240 out:
241         spin_unlock(&delayed_root->lock);
242
243         return next;
244 }
245
246 static void __btrfs_release_delayed_node(
247                                 struct btrfs_delayed_node *delayed_node,
248                                 int mod)
249 {
250         struct btrfs_delayed_root *delayed_root;
251
252         if (!delayed_node)
253                 return;
254
255         delayed_root = delayed_node->root->fs_info->delayed_root;
256
257         mutex_lock(&delayed_node->mutex);
258         if (delayed_node->count)
259                 btrfs_queue_delayed_node(delayed_root, delayed_node, mod);
260         else
261                 btrfs_dequeue_delayed_node(delayed_root, delayed_node);
262         mutex_unlock(&delayed_node->mutex);
263
264         if (atomic_dec_and_test(&delayed_node->refs)) {
265                 bool free = false;
266                 struct btrfs_root *root = delayed_node->root;
267                 spin_lock(&root->inode_lock);
268                 if (atomic_read(&delayed_node->refs) == 0) {
269                         radix_tree_delete(&root->delayed_nodes_tree,
270                                           delayed_node->inode_id);
271                         free = true;
272                 }
273                 spin_unlock(&root->inode_lock);
274                 if (free)
275                         kmem_cache_free(delayed_node_cache, delayed_node);
276         }
277 }
278
279 static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
280 {
281         __btrfs_release_delayed_node(node, 0);
282 }
283
284 static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
285                                         struct btrfs_delayed_root *delayed_root)
286 {
287         struct list_head *p;
288         struct btrfs_delayed_node *node = NULL;
289
290         spin_lock(&delayed_root->lock);
291         if (list_empty(&delayed_root->prepare_list))
292                 goto out;
293
294         p = delayed_root->prepare_list.next;
295         list_del_init(p);
296         node = list_entry(p, struct btrfs_delayed_node, p_list);
297         atomic_inc(&node->refs);
298 out:
299         spin_unlock(&delayed_root->lock);
300
301         return node;
302 }
303
304 static inline void btrfs_release_prepared_delayed_node(
305                                         struct btrfs_delayed_node *node)
306 {
307         __btrfs_release_delayed_node(node, 1);
308 }
309
310 static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
311 {
312         struct btrfs_delayed_item *item;
313         item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
314         if (item) {
315                 item->data_len = data_len;
316                 item->ins_or_del = 0;
317                 item->bytes_reserved = 0;
318                 item->delayed_node = NULL;
319                 atomic_set(&item->refs, 1);
320         }
321         return item;
322 }
323
324 /*
325  * __btrfs_lookup_delayed_item - look up the delayed item by key
326  * @delayed_node: pointer to the delayed node
327  * @key:          the key to look up
328  * @prev:         used to store the prev item if the right item isn't found
329  * @next:         used to store the next item if the right item isn't found
330  *
331  * Note: if we don't find the right item, we will return the prev item and
332  * the next item.
333  */
334 static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
335                                 struct rb_root *root,
336                                 struct btrfs_key *key,
337                                 struct btrfs_delayed_item **prev,
338                                 struct btrfs_delayed_item **next)
339 {
340         struct rb_node *node, *prev_node = NULL;
341         struct btrfs_delayed_item *delayed_item = NULL;
342         int ret = 0;
343
344         node = root->rb_node;
345
346         while (node) {
347                 delayed_item = rb_entry(node, struct btrfs_delayed_item,
348                                         rb_node);
349                 prev_node = node;
350                 ret = btrfs_comp_cpu_keys(&delayed_item->key, key);
351                 if (ret < 0)
352                         node = node->rb_right;
353                 else if (ret > 0)
354                         node = node->rb_left;
355                 else
356                         return delayed_item;
357         }
358
359         if (prev) {
360                 if (!prev_node)
361                         *prev = NULL;
362                 else if (ret < 0)
363                         *prev = delayed_item;
364                 else if ((node = rb_prev(prev_node)) != NULL) {
365                         *prev = rb_entry(node, struct btrfs_delayed_item,
366                                          rb_node);
367                 } else
368                         *prev = NULL;
369         }
370
371         if (next) {
372                 if (!prev_node)
373                         *next = NULL;
374                 else if (ret > 0)
375                         *next = delayed_item;
376                 else if ((node = rb_next(prev_node)) != NULL) {
377                         *next = rb_entry(node, struct btrfs_delayed_item,
378                                          rb_node);
379                 } else
380                         *next = NULL;
381         }
382         return NULL;
383 }
384
385 static struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
386                                         struct btrfs_delayed_node *delayed_node,
387                                         struct btrfs_key *key)
388 {
389         struct btrfs_delayed_item *item;
390
391         item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
392                                            NULL, NULL);
393         return item;
394 }
395
396 static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
397                                     struct btrfs_delayed_item *ins,
398                                     int action)
399 {
400         struct rb_node **p, *node;
401         struct rb_node *parent_node = NULL;
402         struct rb_root *root;
403         struct btrfs_delayed_item *item;
404         int cmp;
405
406         if (action == BTRFS_DELAYED_INSERTION_ITEM)
407                 root = &delayed_node->ins_root;
408         else if (action == BTRFS_DELAYED_DELETION_ITEM)
409                 root = &delayed_node->del_root;
410         else
411                 BUG();
412         p = &root->rb_node;
413         node = &ins->rb_node;
414
415         while (*p) {
416                 parent_node = *p;
417                 item = rb_entry(parent_node, struct btrfs_delayed_item,
418                                  rb_node);
419
420                 cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
421                 if (cmp < 0)
422                         p = &(*p)->rb_right;
423                 else if (cmp > 0)
424                         p = &(*p)->rb_left;
425                 else
426                         return -EEXIST;
427         }
428
429         rb_link_node(node, parent_node, p);
430         rb_insert_color(node, root);
431         ins->delayed_node = delayed_node;
432         ins->ins_or_del = action;
433
434         if (ins->key.type == BTRFS_DIR_INDEX_KEY &&
435             action == BTRFS_DELAYED_INSERTION_ITEM &&
436             ins->key.offset >= delayed_node->index_cnt)
437                         delayed_node->index_cnt = ins->key.offset + 1;
438
439         delayed_node->count++;
440         atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
441         return 0;
442 }
443
444 static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node,
445                                               struct btrfs_delayed_item *item)
446 {
447         return __btrfs_add_delayed_item(node, item,
448                                         BTRFS_DELAYED_INSERTION_ITEM);
449 }
450
451 static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
452                                              struct btrfs_delayed_item *item)
453 {
454         return __btrfs_add_delayed_item(node, item,
455                                         BTRFS_DELAYED_DELETION_ITEM);
456 }
457
458 static void finish_one_item(struct btrfs_delayed_root *delayed_root)
459 {
460         int seq = atomic_inc_return(&delayed_root->items_seq);
461
462         /*
463          * atomic_dec_return implies a barrier for waitqueue_active
464          */
465         if ((atomic_dec_return(&delayed_root->items) <
466             BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0) &&
467             waitqueue_active(&delayed_root->wait))
468                 wake_up(&delayed_root->wait);
469 }
470
471 static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
472 {
473         struct rb_root *root;
474         struct btrfs_delayed_root *delayed_root;
475
476         delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
477
478         BUG_ON(!delayed_root);
479         BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM &&
480                delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM);
481
482         if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
483                 root = &delayed_item->delayed_node->ins_root;
484         else
485                 root = &delayed_item->delayed_node->del_root;
486
487         rb_erase(&delayed_item->rb_node, root);
488         delayed_item->delayed_node->count--;
489
490         finish_one_item(delayed_root);
491 }
492
493 static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
494 {
495         if (item) {
496                 __btrfs_remove_delayed_item(item);
497                 if (atomic_dec_and_test(&item->refs))
498                         kfree(item);
499         }
500 }
501
502 static struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
503                                         struct btrfs_delayed_node *delayed_node)
504 {
505         struct rb_node *p;
506         struct btrfs_delayed_item *item = NULL;
507
508         p = rb_first(&delayed_node->ins_root);
509         if (p)
510                 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
511
512         return item;
513 }
514
515 static struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
516                                         struct btrfs_delayed_node *delayed_node)
517 {
518         struct rb_node *p;
519         struct btrfs_delayed_item *item = NULL;
520
521         p = rb_first(&delayed_node->del_root);
522         if (p)
523                 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
524
525         return item;
526 }
527
528 static struct btrfs_delayed_item *__btrfs_next_delayed_item(
529                                                 struct btrfs_delayed_item *item)
530 {
531         struct rb_node *p;
532         struct btrfs_delayed_item *next = NULL;
533
534         p = rb_next(&item->rb_node);
535         if (p)
536                 next = rb_entry(p, struct btrfs_delayed_item, rb_node);
537
538         return next;
539 }
540
541 static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
542                                                struct btrfs_root *root,
543                                                struct btrfs_delayed_item *item)
544 {
545         struct btrfs_block_rsv *src_rsv;
546         struct btrfs_block_rsv *dst_rsv;
547         u64 num_bytes;
548         int ret;
549
550         if (!trans->bytes_reserved)
551                 return 0;
552
553         src_rsv = trans->block_rsv;
554         dst_rsv = &root->fs_info->delayed_block_rsv;
555
556         num_bytes = btrfs_calc_trans_metadata_size(root, 1);
557         ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
558         if (!ret) {
559                 trace_btrfs_space_reservation(root->fs_info, "delayed_item",
560                                               item->key.objectid,
561                                               num_bytes, 1);
562                 item->bytes_reserved = num_bytes;
563         }
564
565         return ret;
566 }
567
568 static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
569                                                 struct btrfs_delayed_item *item)
570 {
571         struct btrfs_block_rsv *rsv;
572
573         if (!item->bytes_reserved)
574                 return;
575
576         rsv = &root->fs_info->delayed_block_rsv;
577         trace_btrfs_space_reservation(root->fs_info, "delayed_item",
578                                       item->key.objectid, item->bytes_reserved,
579                                       0);
580         btrfs_block_rsv_release(root, rsv,
581                                 item->bytes_reserved);
582 }
583
584 static int btrfs_delayed_inode_reserve_metadata(
585                                         struct btrfs_trans_handle *trans,
586                                         struct btrfs_root *root,
587                                         struct inode *inode,
588                                         struct btrfs_delayed_node *node)
589 {
590         struct btrfs_block_rsv *src_rsv;
591         struct btrfs_block_rsv *dst_rsv;
592         u64 num_bytes;
593         int ret;
594         bool release = false;
595
596         src_rsv = trans->block_rsv;
597         dst_rsv = &root->fs_info->delayed_block_rsv;
598
599         num_bytes = btrfs_calc_trans_metadata_size(root, 1);
600
601         /*
602          * btrfs_dirty_inode will update the inode under btrfs_join_transaction
603          * which doesn't reserve space for speed.  This is a problem since we
604          * still need to reserve space for this update, so try to reserve the
605          * space.
606          *
607          * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
608          * we're accounted for.
609          */
610         if (!src_rsv || (!trans->bytes_reserved &&
611                          src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
612                 ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
613                                           BTRFS_RESERVE_NO_FLUSH);
614                 /*
615                  * Since we're under a transaction reserve_metadata_bytes could
616                  * try to commit the transaction which will make it return
617                  * EAGAIN to make us stop the transaction we have, so return
618                  * ENOSPC instead so that btrfs_dirty_inode knows what to do.
619                  */
620                 if (ret == -EAGAIN)
621                         ret = -ENOSPC;
622                 if (!ret) {
623                         node->bytes_reserved = num_bytes;
624                         trace_btrfs_space_reservation(root->fs_info,
625                                                       "delayed_inode",
626                                                       btrfs_ino(inode),
627                                                       num_bytes, 1);
628                 }
629                 return ret;
630         } else if (src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) {
631                 spin_lock(&BTRFS_I(inode)->lock);
632                 if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
633                                        &BTRFS_I(inode)->runtime_flags)) {
634                         spin_unlock(&BTRFS_I(inode)->lock);
635                         release = true;
636                         goto migrate;
637                 }
638                 spin_unlock(&BTRFS_I(inode)->lock);
639
640                 /* Ok we didn't have space pre-reserved.  This shouldn't happen
641                  * too often but it can happen if we do delalloc to an existing
642                  * inode which gets dirtied because of the time update, and then
643                  * isn't touched again until after the transaction commits and
644                  * then we try to write out the data.  First try to be nice and
645                  * reserve something strictly for us.  If not be a pain and try
646                  * to steal from the delalloc block rsv.
647                  */
648                 ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
649                                           BTRFS_RESERVE_NO_FLUSH);
650                 if (!ret)
651                         goto out;
652
653                 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
654                 if (!WARN_ON(ret))
655                         goto out;
656
657                 /*
658                  * Ok this is a problem, let's just steal from the global rsv
659                  * since this really shouldn't happen that often.
660                  */
661                 ret = btrfs_block_rsv_migrate(&root->fs_info->global_block_rsv,
662                                               dst_rsv, num_bytes);
663                 goto out;
664         }
665
666 migrate:
667         ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
668
669 out:
670         /*
671          * Migrate only takes a reservation, it doesn't touch the size of the
672          * block_rsv.  This is to simplify people who don't normally have things
673          * migrated from their block rsv.  If they go to release their
674          * reservation, that will decrease the size as well, so if migrate
675          * reduced size we'd end up with a negative size.  But for the
676          * delalloc_meta_reserved stuff we will only know to drop 1 reservation,
677          * but we could in fact do this reserve/migrate dance several times
678          * between the time we did the original reservation and we'd clean it
679          * up.  So to take care of this, release the space for the meta
680          * reservation here.  I think it may be time for a documentation page on
681          * how block rsvs. work.
682          */
683         if (!ret) {
684                 trace_btrfs_space_reservation(root->fs_info, "delayed_inode",
685                                               btrfs_ino(inode), num_bytes, 1);
686                 node->bytes_reserved = num_bytes;
687         }
688
689         if (release) {
690                 trace_btrfs_space_reservation(root->fs_info, "delalloc",
691                                               btrfs_ino(inode), num_bytes, 0);
692                 btrfs_block_rsv_release(root, src_rsv, num_bytes);
693         }
694
695         return ret;
696 }
697
698 static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root,
699                                                 struct btrfs_delayed_node *node)
700 {
701         struct btrfs_block_rsv *rsv;
702
703         if (!node->bytes_reserved)
704                 return;
705
706         rsv = &root->fs_info->delayed_block_rsv;
707         trace_btrfs_space_reservation(root->fs_info, "delayed_inode",
708                                       node->inode_id, node->bytes_reserved, 0);
709         btrfs_block_rsv_release(root, rsv,
710                                 node->bytes_reserved);
711         node->bytes_reserved = 0;
712 }
713
714 /*
715  * This helper will insert some continuous items into the same leaf according
716  * to the free space of the leaf.
717  */
718 static int btrfs_batch_insert_items(struct btrfs_root *root,
719                                     struct btrfs_path *path,
720                                     struct btrfs_delayed_item *item)
721 {
722         struct btrfs_delayed_item *curr, *next;
723         int free_space;
724         int total_data_size = 0, total_size = 0;
725         struct extent_buffer *leaf;
726         char *data_ptr;
727         struct btrfs_key *keys;
728         u32 *data_size;
729         struct list_head head;
730         int slot;
731         int nitems;
732         int i;
733         int ret = 0;
734
735         BUG_ON(!path->nodes[0]);
736
737         leaf = path->nodes[0];
738         free_space = btrfs_leaf_free_space(root, leaf);
739         INIT_LIST_HEAD(&head);
740
741         next = item;
742         nitems = 0;
743
744         /*
745          * count the number of the continuous items that we can insert in batch
746          */
747         while (total_size + next->data_len + sizeof(struct btrfs_item) <=
748                free_space) {
749                 total_data_size += next->data_len;
750                 total_size += next->data_len + sizeof(struct btrfs_item);
751                 list_add_tail(&next->tree_list, &head);
752                 nitems++;
753
754                 curr = next;
755                 next = __btrfs_next_delayed_item(curr);
756                 if (!next)
757                         break;
758
759                 if (!btrfs_is_continuous_delayed_item(curr, next))
760                         break;
761         }
762
763         if (!nitems) {
764                 ret = 0;
765                 goto out;
766         }
767
768         /*
769          * we need allocate some memory space, but it might cause the task
770          * to sleep, so we set all locked nodes in the path to blocking locks
771          * first.
772          */
773         btrfs_set_path_blocking(path);
774
775         keys = kmalloc_array(nitems, sizeof(struct btrfs_key), GFP_NOFS);
776         if (!keys) {
777                 ret = -ENOMEM;
778                 goto out;
779         }
780
781         data_size = kmalloc_array(nitems, sizeof(u32), GFP_NOFS);
782         if (!data_size) {
783                 ret = -ENOMEM;
784                 goto error;
785         }
786
787         /* get keys of all the delayed items */
788         i = 0;
789         list_for_each_entry(next, &head, tree_list) {
790                 keys[i] = next->key;
791                 data_size[i] = next->data_len;
792                 i++;
793         }
794
795         /* reset all the locked nodes in the patch to spinning locks. */
796         btrfs_clear_path_blocking(path, NULL, 0);
797
798         /* insert the keys of the items */
799         setup_items_for_insert(root, path, keys, data_size,
800                                total_data_size, total_size, nitems);
801
802         /* insert the dir index items */
803         slot = path->slots[0];
804         list_for_each_entry_safe(curr, next, &head, tree_list) {
805                 data_ptr = btrfs_item_ptr(leaf, slot, char);
806                 write_extent_buffer(leaf, &curr->data,
807                                     (unsigned long)data_ptr,
808                                     curr->data_len);
809                 slot++;
810
811                 btrfs_delayed_item_release_metadata(root, curr);
812
813                 list_del(&curr->tree_list);
814                 btrfs_release_delayed_item(curr);
815         }
816
817 error:
818         kfree(data_size);
819         kfree(keys);
820 out:
821         return ret;
822 }
823
824 /*
825  * This helper can just do simple insertion that needn't extend item for new
826  * data, such as directory name index insertion, inode insertion.
827  */
828 static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
829                                      struct btrfs_root *root,
830                                      struct btrfs_path *path,
831                                      struct btrfs_delayed_item *delayed_item)
832 {
833         struct extent_buffer *leaf;
834         char *ptr;
835         int ret;
836
837         ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
838                                       delayed_item->data_len);
839         if (ret < 0 && ret != -EEXIST)
840                 return ret;
841
842         leaf = path->nodes[0];
843
844         ptr = btrfs_item_ptr(leaf, path->slots[0], char);
845
846         write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
847                             delayed_item->data_len);
848         btrfs_mark_buffer_dirty(leaf);
849
850         btrfs_delayed_item_release_metadata(root, delayed_item);
851         return 0;
852 }
853
854 /*
855  * we insert an item first, then if there are some continuous items, we try
856  * to insert those items into the same leaf.
857  */
858 static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
859                                       struct btrfs_path *path,
860                                       struct btrfs_root *root,
861                                       struct btrfs_delayed_node *node)
862 {
863         struct btrfs_delayed_item *curr, *prev;
864         int ret = 0;
865
866 do_again:
867         mutex_lock(&node->mutex);
868         curr = __btrfs_first_delayed_insertion_item(node);
869         if (!curr)
870                 goto insert_end;
871
872         ret = btrfs_insert_delayed_item(trans, root, path, curr);
873         if (ret < 0) {
874                 btrfs_release_path(path);
875                 goto insert_end;
876         }
877
878         prev = curr;
879         curr = __btrfs_next_delayed_item(prev);
880         if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
881                 /* insert the continuous items into the same leaf */
882                 path->slots[0]++;
883                 btrfs_batch_insert_items(root, path, curr);
884         }
885         btrfs_release_delayed_item(prev);
886         btrfs_mark_buffer_dirty(path->nodes[0]);
887
888         btrfs_release_path(path);
889         mutex_unlock(&node->mutex);
890         goto do_again;
891
892 insert_end:
893         mutex_unlock(&node->mutex);
894         return ret;
895 }
896
897 static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
898                                     struct btrfs_root *root,
899                                     struct btrfs_path *path,
900                                     struct btrfs_delayed_item *item)
901 {
902         struct btrfs_delayed_item *curr, *next;
903         struct extent_buffer *leaf;
904         struct btrfs_key key;
905         struct list_head head;
906         int nitems, i, last_item;
907         int ret = 0;
908
909         BUG_ON(!path->nodes[0]);
910
911         leaf = path->nodes[0];
912
913         i = path->slots[0];
914         last_item = btrfs_header_nritems(leaf) - 1;
915         if (i > last_item)
916                 return -ENOENT; /* FIXME: Is errno suitable? */
917
918         next = item;
919         INIT_LIST_HEAD(&head);
920         btrfs_item_key_to_cpu(leaf, &key, i);
921         nitems = 0;
922         /*
923          * count the number of the dir index items that we can delete in batch
924          */
925         while (btrfs_comp_cpu_keys(&next->key, &key) == 0) {
926                 list_add_tail(&next->tree_list, &head);
927                 nitems++;
928
929                 curr = next;
930                 next = __btrfs_next_delayed_item(curr);
931                 if (!next)
932                         break;
933
934                 if (!btrfs_is_continuous_delayed_item(curr, next))
935                         break;
936
937                 i++;
938                 if (i > last_item)
939                         break;
940                 btrfs_item_key_to_cpu(leaf, &key, i);
941         }
942
943         if (!nitems)
944                 return 0;
945
946         ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
947         if (ret)
948                 goto out;
949
950         list_for_each_entry_safe(curr, next, &head, tree_list) {
951                 btrfs_delayed_item_release_metadata(root, curr);
952                 list_del(&curr->tree_list);
953                 btrfs_release_delayed_item(curr);
954         }
955
956 out:
957         return ret;
958 }
959
960 static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
961                                       struct btrfs_path *path,
962                                       struct btrfs_root *root,
963                                       struct btrfs_delayed_node *node)
964 {
965         struct btrfs_delayed_item *curr, *prev;
966         int ret = 0;
967
968 do_again:
969         mutex_lock(&node->mutex);
970         curr = __btrfs_first_delayed_deletion_item(node);
971         if (!curr)
972                 goto delete_fail;
973
974         ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
975         if (ret < 0)
976                 goto delete_fail;
977         else if (ret > 0) {
978                 /*
979                  * can't find the item which the node points to, so this node
980                  * is invalid, just drop it.
981                  */
982                 prev = curr;
983                 curr = __btrfs_next_delayed_item(prev);
984                 btrfs_release_delayed_item(prev);
985                 ret = 0;
986                 btrfs_release_path(path);
987                 if (curr) {
988                         mutex_unlock(&node->mutex);
989                         goto do_again;
990                 } else
991                         goto delete_fail;
992         }
993
994         btrfs_batch_delete_items(trans, root, path, curr);
995         btrfs_release_path(path);
996         mutex_unlock(&node->mutex);
997         goto do_again;
998
999 delete_fail:
1000         btrfs_release_path(path);
1001         mutex_unlock(&node->mutex);
1002         return ret;
1003 }
1004
1005 static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
1006 {
1007         struct btrfs_delayed_root *delayed_root;
1008
1009         if (delayed_node &&
1010             test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1011                 BUG_ON(!delayed_node->root);
1012                 clear_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
1013                 delayed_node->count--;
1014
1015                 delayed_root = delayed_node->root->fs_info->delayed_root;
1016                 finish_one_item(delayed_root);
1017         }
1018 }
1019
1020 static void btrfs_release_delayed_iref(struct btrfs_delayed_node *delayed_node)
1021 {
1022         struct btrfs_delayed_root *delayed_root;
1023
1024         ASSERT(delayed_node->root);
1025         clear_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
1026         delayed_node->count--;
1027
1028         delayed_root = delayed_node->root->fs_info->delayed_root;
1029         finish_one_item(delayed_root);
1030 }
1031
1032 static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1033                                         struct btrfs_root *root,
1034                                         struct btrfs_path *path,
1035                                         struct btrfs_delayed_node *node)
1036 {
1037         struct btrfs_key key;
1038         struct btrfs_inode_item *inode_item;
1039         struct extent_buffer *leaf;
1040         int mod;
1041         int ret;
1042
1043         key.objectid = node->inode_id;
1044         key.type = BTRFS_INODE_ITEM_KEY;
1045         key.offset = 0;
1046
1047         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1048                 mod = -1;
1049         else
1050                 mod = 1;
1051
1052         ret = btrfs_lookup_inode(trans, root, path, &key, mod);
1053         if (ret > 0) {
1054                 btrfs_release_path(path);
1055                 return -ENOENT;
1056         } else if (ret < 0) {
1057                 return ret;
1058         }
1059
1060         leaf = path->nodes[0];
1061         inode_item = btrfs_item_ptr(leaf, path->slots[0],
1062                                     struct btrfs_inode_item);
1063         write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
1064                             sizeof(struct btrfs_inode_item));
1065         btrfs_mark_buffer_dirty(leaf);
1066
1067         if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1068                 goto no_iref;
1069
1070         path->slots[0]++;
1071         if (path->slots[0] >= btrfs_header_nritems(leaf))
1072                 goto search;
1073 again:
1074         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1075         if (key.objectid != node->inode_id)
1076                 goto out;
1077
1078         if (key.type != BTRFS_INODE_REF_KEY &&
1079             key.type != BTRFS_INODE_EXTREF_KEY)
1080                 goto out;
1081
1082         /*
1083          * Delayed iref deletion is for the inode who has only one link,
1084          * so there is only one iref. The case that several irefs are
1085          * in the same item doesn't exist.
1086          */
1087         btrfs_del_item(trans, root, path);
1088 out:
1089         btrfs_release_delayed_iref(node);
1090 no_iref:
1091         btrfs_release_path(path);
1092 err_out:
1093         btrfs_delayed_inode_release_metadata(root, node);
1094         btrfs_release_delayed_inode(node);
1095
1096         return ret;
1097
1098 search:
1099         btrfs_release_path(path);
1100
1101         key.type = BTRFS_INODE_EXTREF_KEY;
1102         key.offset = -1;
1103         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1104         if (ret < 0)
1105                 goto err_out;
1106         ASSERT(ret);
1107
1108         ret = 0;
1109         leaf = path->nodes[0];
1110         path->slots[0]--;
1111         goto again;
1112 }
1113
1114 static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1115                                              struct btrfs_root *root,
1116                                              struct btrfs_path *path,
1117                                              struct btrfs_delayed_node *node)
1118 {
1119         int ret;
1120
1121         mutex_lock(&node->mutex);
1122         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &node->flags)) {
1123                 mutex_unlock(&node->mutex);
1124                 return 0;
1125         }
1126
1127         ret = __btrfs_update_delayed_inode(trans, root, path, node);
1128         mutex_unlock(&node->mutex);
1129         return ret;
1130 }
1131
1132 static inline int
1133 __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1134                                    struct btrfs_path *path,
1135                                    struct btrfs_delayed_node *node)
1136 {
1137         int ret;
1138
1139         ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1140         if (ret)
1141                 return ret;
1142
1143         ret = btrfs_delete_delayed_items(trans, path, node->root, node);
1144         if (ret)
1145                 return ret;
1146
1147         ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1148         return ret;
1149 }
1150
1151 /*
1152  * Called when committing the transaction.
1153  * Returns 0 on success.
1154  * Returns < 0 on error and returns with an aborted transaction with any
1155  * outstanding delayed items cleaned up.
1156  */
1157 static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1158                                      struct btrfs_root *root, int nr)
1159 {
1160         struct btrfs_delayed_root *delayed_root;
1161         struct btrfs_delayed_node *curr_node, *prev_node;
1162         struct btrfs_path *path;
1163         struct btrfs_block_rsv *block_rsv;
1164         int ret = 0;
1165         bool count = (nr > 0);
1166
1167         if (trans->aborted)
1168                 return -EIO;
1169
1170         path = btrfs_alloc_path();
1171         if (!path)
1172                 return -ENOMEM;
1173         path->leave_spinning = 1;
1174
1175         block_rsv = trans->block_rsv;
1176         trans->block_rsv = &root->fs_info->delayed_block_rsv;
1177
1178         delayed_root = btrfs_get_delayed_root(root);
1179
1180         curr_node = btrfs_first_delayed_node(delayed_root);
1181         while (curr_node && (!count || (count && nr--))) {
1182                 ret = __btrfs_commit_inode_delayed_items(trans, path,
1183                                                          curr_node);
1184                 if (ret) {
1185                         btrfs_release_delayed_node(curr_node);
1186                         curr_node = NULL;
1187                         btrfs_abort_transaction(trans, root, ret);
1188                         break;
1189                 }
1190
1191                 prev_node = curr_node;
1192                 curr_node = btrfs_next_delayed_node(curr_node);
1193                 btrfs_release_delayed_node(prev_node);
1194         }
1195
1196         if (curr_node)
1197                 btrfs_release_delayed_node(curr_node);
1198         btrfs_free_path(path);
1199         trans->block_rsv = block_rsv;
1200
1201         return ret;
1202 }
1203
1204 int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1205                             struct btrfs_root *root)
1206 {
1207         return __btrfs_run_delayed_items(trans, root, -1);
1208 }
1209
1210 int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
1211                                struct btrfs_root *root, int nr)
1212 {
1213         return __btrfs_run_delayed_items(trans, root, nr);
1214 }
1215
1216 int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1217                                      struct inode *inode)
1218 {
1219         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1220         struct btrfs_path *path;
1221         struct btrfs_block_rsv *block_rsv;
1222         int ret;
1223
1224         if (!delayed_node)
1225                 return 0;
1226
1227         mutex_lock(&delayed_node->mutex);
1228         if (!delayed_node->count) {
1229                 mutex_unlock(&delayed_node->mutex);
1230                 btrfs_release_delayed_node(delayed_node);
1231                 return 0;
1232         }
1233         mutex_unlock(&delayed_node->mutex);
1234
1235         path = btrfs_alloc_path();
1236         if (!path) {
1237                 btrfs_release_delayed_node(delayed_node);
1238                 return -ENOMEM;
1239         }
1240         path->leave_spinning = 1;
1241
1242         block_rsv = trans->block_rsv;
1243         trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
1244
1245         ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1246
1247         btrfs_release_delayed_node(delayed_node);
1248         btrfs_free_path(path);
1249         trans->block_rsv = block_rsv;
1250
1251         return ret;
1252 }
1253
1254 int btrfs_commit_inode_delayed_inode(struct inode *inode)
1255 {
1256         struct btrfs_trans_handle *trans;
1257         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1258         struct btrfs_path *path;
1259         struct btrfs_block_rsv *block_rsv;
1260         int ret;
1261
1262         if (!delayed_node)
1263                 return 0;
1264
1265         mutex_lock(&delayed_node->mutex);
1266         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1267                 mutex_unlock(&delayed_node->mutex);
1268                 btrfs_release_delayed_node(delayed_node);
1269                 return 0;
1270         }
1271         mutex_unlock(&delayed_node->mutex);
1272
1273         trans = btrfs_join_transaction(delayed_node->root);
1274         if (IS_ERR(trans)) {
1275                 ret = PTR_ERR(trans);
1276                 goto out;
1277         }
1278
1279         path = btrfs_alloc_path();
1280         if (!path) {
1281                 ret = -ENOMEM;
1282                 goto trans_out;
1283         }
1284         path->leave_spinning = 1;
1285
1286         block_rsv = trans->block_rsv;
1287         trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
1288
1289         mutex_lock(&delayed_node->mutex);
1290         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags))
1291                 ret = __btrfs_update_delayed_inode(trans, delayed_node->root,
1292                                                    path, delayed_node);
1293         else
1294                 ret = 0;
1295         mutex_unlock(&delayed_node->mutex);
1296
1297         btrfs_free_path(path);
1298         trans->block_rsv = block_rsv;
1299 trans_out:
1300         btrfs_end_transaction(trans, delayed_node->root);
1301         btrfs_btree_balance_dirty(delayed_node->root);
1302 out:
1303         btrfs_release_delayed_node(delayed_node);
1304
1305         return ret;
1306 }
1307
1308 void btrfs_remove_delayed_node(struct inode *inode)
1309 {
1310         struct btrfs_delayed_node *delayed_node;
1311
1312         delayed_node = ACCESS_ONCE(BTRFS_I(inode)->delayed_node);
1313         if (!delayed_node)
1314                 return;
1315
1316         BTRFS_I(inode)->delayed_node = NULL;
1317         btrfs_release_delayed_node(delayed_node);
1318 }
1319
1320 struct btrfs_async_delayed_work {
1321         struct btrfs_delayed_root *delayed_root;
1322         int nr;
1323         struct btrfs_work work;
1324 };
1325
1326 static void btrfs_async_run_delayed_root(struct btrfs_work *work)
1327 {
1328         struct btrfs_async_delayed_work *async_work;
1329         struct btrfs_delayed_root *delayed_root;
1330         struct btrfs_trans_handle *trans;
1331         struct btrfs_path *path;
1332         struct btrfs_delayed_node *delayed_node = NULL;
1333         struct btrfs_root *root;
1334         struct btrfs_block_rsv *block_rsv;
1335         int total_done = 0;
1336
1337         async_work = container_of(work, struct btrfs_async_delayed_work, work);
1338         delayed_root = async_work->delayed_root;
1339
1340         path = btrfs_alloc_path();
1341         if (!path)
1342                 goto out;
1343
1344 again:
1345         if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND / 2)
1346                 goto free_path;
1347
1348         delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
1349         if (!delayed_node)
1350                 goto free_path;
1351
1352         path->leave_spinning = 1;
1353         root = delayed_node->root;
1354
1355         trans = btrfs_join_transaction(root);
1356         if (IS_ERR(trans))
1357                 goto release_path;
1358
1359         block_rsv = trans->block_rsv;
1360         trans->block_rsv = &root->fs_info->delayed_block_rsv;
1361
1362         __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1363
1364         trans->block_rsv = block_rsv;
1365         btrfs_end_transaction(trans, root);
1366         btrfs_btree_balance_dirty_nodelay(root);
1367
1368 release_path:
1369         btrfs_release_path(path);
1370         total_done++;
1371
1372         btrfs_release_prepared_delayed_node(delayed_node);
1373         if (async_work->nr == 0 || total_done < async_work->nr)
1374                 goto again;
1375
1376 free_path:
1377         btrfs_free_path(path);
1378 out:
1379         wake_up(&delayed_root->wait);
1380         kfree(async_work);
1381 }
1382
1383
1384 static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1385                                      struct btrfs_fs_info *fs_info, int nr)
1386 {
1387         struct btrfs_async_delayed_work *async_work;
1388
1389         if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1390                 return 0;
1391
1392         async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
1393         if (!async_work)
1394                 return -ENOMEM;
1395
1396         async_work->delayed_root = delayed_root;
1397         btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper,
1398                         btrfs_async_run_delayed_root, NULL, NULL);
1399         async_work->nr = nr;
1400
1401         btrfs_queue_work(fs_info->delayed_workers, &async_work->work);
1402         return 0;
1403 }
1404
1405 void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
1406 {
1407         struct btrfs_delayed_root *delayed_root;
1408         delayed_root = btrfs_get_delayed_root(root);
1409         WARN_ON(btrfs_first_delayed_node(delayed_root));
1410 }
1411
1412 static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
1413 {
1414         int val = atomic_read(&delayed_root->items_seq);
1415
1416         if (val < seq || val >= seq + BTRFS_DELAYED_BATCH)
1417                 return 1;
1418
1419         if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1420                 return 1;
1421
1422         return 0;
1423 }
1424
1425 void btrfs_balance_delayed_items(struct btrfs_root *root)
1426 {
1427         struct btrfs_delayed_root *delayed_root;
1428         struct btrfs_fs_info *fs_info = root->fs_info;
1429
1430         delayed_root = btrfs_get_delayed_root(root);
1431
1432         if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1433                 return;
1434
1435         if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
1436                 int seq;
1437                 int ret;
1438
1439                 seq = atomic_read(&delayed_root->items_seq);
1440
1441                 ret = btrfs_wq_run_delayed_node(delayed_root, fs_info, 0);
1442                 if (ret)
1443                         return;
1444
1445                 wait_event_interruptible(delayed_root->wait,
1446                                          could_end_wait(delayed_root, seq));
1447                 return;
1448         }
1449
1450         btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
1451 }
1452
1453 /* Will return 0 or -ENOMEM */
1454 int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1455                                    struct btrfs_root *root, const char *name,
1456                                    int name_len, struct inode *dir,
1457                                    struct btrfs_disk_key *disk_key, u8 type,
1458                                    u64 index)
1459 {
1460         struct btrfs_delayed_node *delayed_node;
1461         struct btrfs_delayed_item *delayed_item;
1462         struct btrfs_dir_item *dir_item;
1463         int ret;
1464
1465         delayed_node = btrfs_get_or_create_delayed_node(dir);
1466         if (IS_ERR(delayed_node))
1467                 return PTR_ERR(delayed_node);
1468
1469         delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len);
1470         if (!delayed_item) {
1471                 ret = -ENOMEM;
1472                 goto release_node;
1473         }
1474
1475         delayed_item->key.objectid = btrfs_ino(dir);
1476         delayed_item->key.type = BTRFS_DIR_INDEX_KEY;
1477         delayed_item->key.offset = index;
1478
1479         dir_item = (struct btrfs_dir_item *)delayed_item->data;
1480         dir_item->location = *disk_key;
1481         btrfs_set_stack_dir_transid(dir_item, trans->transid);
1482         btrfs_set_stack_dir_data_len(dir_item, 0);
1483         btrfs_set_stack_dir_name_len(dir_item, name_len);
1484         btrfs_set_stack_dir_type(dir_item, type);
1485         memcpy((char *)(dir_item + 1), name, name_len);
1486
1487         ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item);
1488         /*
1489          * we have reserved enough space when we start a new transaction,
1490          * so reserving metadata failure is impossible
1491          */
1492         BUG_ON(ret);
1493
1494
1495         mutex_lock(&delayed_node->mutex);
1496         ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
1497         if (unlikely(ret)) {
1498                 btrfs_err(root->fs_info, "err add delayed dir index item(name: %.*s) "
1499                                 "into the insertion tree of the delayed node"
1500                                 "(root id: %llu, inode id: %llu, errno: %d)",
1501                                 name_len, name, delayed_node->root->objectid,
1502                                 delayed_node->inode_id, ret);
1503                 BUG();
1504         }
1505         mutex_unlock(&delayed_node->mutex);
1506
1507 release_node:
1508         btrfs_release_delayed_node(delayed_node);
1509         return ret;
1510 }
1511
1512 static int btrfs_delete_delayed_insertion_item(struct btrfs_root *root,
1513                                                struct btrfs_delayed_node *node,
1514                                                struct btrfs_key *key)
1515 {
1516         struct btrfs_delayed_item *item;
1517
1518         mutex_lock(&node->mutex);
1519         item = __btrfs_lookup_delayed_insertion_item(node, key);
1520         if (!item) {
1521                 mutex_unlock(&node->mutex);
1522                 return 1;
1523         }
1524
1525         btrfs_delayed_item_release_metadata(root, item);
1526         btrfs_release_delayed_item(item);
1527         mutex_unlock(&node->mutex);
1528         return 0;
1529 }
1530
1531 int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1532                                    struct btrfs_root *root, struct inode *dir,
1533                                    u64 index)
1534 {
1535         struct btrfs_delayed_node *node;
1536         struct btrfs_delayed_item *item;
1537         struct btrfs_key item_key;
1538         int ret;
1539
1540         node = btrfs_get_or_create_delayed_node(dir);
1541         if (IS_ERR(node))
1542                 return PTR_ERR(node);
1543
1544         item_key.objectid = btrfs_ino(dir);
1545         item_key.type = BTRFS_DIR_INDEX_KEY;
1546         item_key.offset = index;
1547
1548         ret = btrfs_delete_delayed_insertion_item(root, node, &item_key);
1549         if (!ret)
1550                 goto end;
1551
1552         item = btrfs_alloc_delayed_item(0);
1553         if (!item) {
1554                 ret = -ENOMEM;
1555                 goto end;
1556         }
1557
1558         item->key = item_key;
1559
1560         ret = btrfs_delayed_item_reserve_metadata(trans, root, item);
1561         /*
1562          * we have reserved enough space when we start a new transaction,
1563          * so reserving metadata failure is impossible.
1564          */
1565         BUG_ON(ret);
1566
1567         mutex_lock(&node->mutex);
1568         ret = __btrfs_add_delayed_deletion_item(node, item);
1569         if (unlikely(ret)) {
1570                 btrfs_err(root->fs_info, "err add delayed dir index item(index: %llu) "
1571                                 "into the deletion tree of the delayed node"
1572                                 "(root id: %llu, inode id: %llu, errno: %d)",
1573                                 index, node->root->objectid, node->inode_id,
1574                                 ret);
1575                 BUG();
1576         }
1577         mutex_unlock(&node->mutex);
1578 end:
1579         btrfs_release_delayed_node(node);
1580         return ret;
1581 }
1582
1583 int btrfs_inode_delayed_dir_index_count(struct inode *inode)
1584 {
1585         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1586
1587         if (!delayed_node)
1588                 return -ENOENT;
1589
1590         /*
1591          * Since we have held i_mutex of this directory, it is impossible that
1592          * a new directory index is added into the delayed node and index_cnt
1593          * is updated now. So we needn't lock the delayed node.
1594          */
1595         if (!delayed_node->index_cnt) {
1596                 btrfs_release_delayed_node(delayed_node);
1597                 return -EINVAL;
1598         }
1599
1600         BTRFS_I(inode)->index_cnt = delayed_node->index_cnt;
1601         btrfs_release_delayed_node(delayed_node);
1602         return 0;
1603 }
1604
1605 void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
1606                              struct list_head *del_list)
1607 {
1608         struct btrfs_delayed_node *delayed_node;
1609         struct btrfs_delayed_item *item;
1610
1611         delayed_node = btrfs_get_delayed_node(inode);
1612         if (!delayed_node)
1613                 return;
1614
1615         mutex_lock(&delayed_node->mutex);
1616         item = __btrfs_first_delayed_insertion_item(delayed_node);
1617         while (item) {
1618                 atomic_inc(&item->refs);
1619                 list_add_tail(&item->readdir_list, ins_list);
1620                 item = __btrfs_next_delayed_item(item);
1621         }
1622
1623         item = __btrfs_first_delayed_deletion_item(delayed_node);
1624         while (item) {
1625                 atomic_inc(&item->refs);
1626                 list_add_tail(&item->readdir_list, del_list);
1627                 item = __btrfs_next_delayed_item(item);
1628         }
1629         mutex_unlock(&delayed_node->mutex);
1630         /*
1631          * This delayed node is still cached in the btrfs inode, so refs
1632          * must be > 1 now, and we needn't check it is going to be freed
1633          * or not.
1634          *
1635          * Besides that, this function is used to read dir, we do not
1636          * insert/delete delayed items in this period. So we also needn't
1637          * requeue or dequeue this delayed node.
1638          */
1639         atomic_dec(&delayed_node->refs);
1640 }
1641
1642 void btrfs_put_delayed_items(struct list_head *ins_list,
1643                              struct list_head *del_list)
1644 {
1645         struct btrfs_delayed_item *curr, *next;
1646
1647         list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1648                 list_del(&curr->readdir_list);
1649                 if (atomic_dec_and_test(&curr->refs))
1650                         kfree(curr);
1651         }
1652
1653         list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1654                 list_del(&curr->readdir_list);
1655                 if (atomic_dec_and_test(&curr->refs))
1656                         kfree(curr);
1657         }
1658 }
1659
1660 int btrfs_should_delete_dir_index(struct list_head *del_list,
1661                                   u64 index)
1662 {
1663         struct btrfs_delayed_item *curr, *next;
1664         int ret;
1665
1666         if (list_empty(del_list))
1667                 return 0;
1668
1669         list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1670                 if (curr->key.offset > index)
1671                         break;
1672
1673                 list_del(&curr->readdir_list);
1674                 ret = (curr->key.offset == index);
1675
1676                 if (atomic_dec_and_test(&curr->refs))
1677                         kfree(curr);
1678
1679                 if (ret)
1680                         return 1;
1681                 else
1682                         continue;
1683         }
1684         return 0;
1685 }
1686
1687 /*
1688  * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
1689  *
1690  */
1691 int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
1692                                     struct list_head *ins_list)
1693 {
1694         struct btrfs_dir_item *di;
1695         struct btrfs_delayed_item *curr, *next;
1696         struct btrfs_key location;
1697         char *name;
1698         int name_len;
1699         int over = 0;
1700         unsigned char d_type;
1701
1702         if (list_empty(ins_list))
1703                 return 0;
1704
1705         /*
1706          * Changing the data of the delayed item is impossible. So
1707          * we needn't lock them. And we have held i_mutex of the
1708          * directory, nobody can delete any directory indexes now.
1709          */
1710         list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1711                 list_del(&curr->readdir_list);
1712
1713                 if (curr->key.offset < ctx->pos) {
1714                         if (atomic_dec_and_test(&curr->refs))
1715                                 kfree(curr);
1716                         continue;
1717                 }
1718
1719                 ctx->pos = curr->key.offset;
1720
1721                 di = (struct btrfs_dir_item *)curr->data;
1722                 name = (char *)(di + 1);
1723                 name_len = btrfs_stack_dir_name_len(di);
1724
1725                 d_type = btrfs_filetype_table[di->type];
1726                 btrfs_disk_key_to_cpu(&location, &di->location);
1727
1728                 over = !dir_emit(ctx, name, name_len,
1729                                location.objectid, d_type);
1730
1731                 if (atomic_dec_and_test(&curr->refs))
1732                         kfree(curr);
1733
1734                 if (over)
1735                         return 1;
1736         }
1737         return 0;
1738 }
1739
1740 static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1741                                   struct btrfs_inode_item *inode_item,
1742                                   struct inode *inode)
1743 {
1744         btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode));
1745         btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode));
1746         btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
1747         btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
1748         btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
1749         btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
1750         btrfs_set_stack_inode_generation(inode_item,
1751                                          BTRFS_I(inode)->generation);
1752         btrfs_set_stack_inode_sequence(inode_item, inode->i_version);
1753         btrfs_set_stack_inode_transid(inode_item, trans->transid);
1754         btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1755         btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
1756         btrfs_set_stack_inode_block_group(inode_item, 0);
1757
1758         btrfs_set_stack_timespec_sec(&inode_item->atime,
1759                                      inode->i_atime.tv_sec);
1760         btrfs_set_stack_timespec_nsec(&inode_item->atime,
1761                                       inode->i_atime.tv_nsec);
1762
1763         btrfs_set_stack_timespec_sec(&inode_item->mtime,
1764                                      inode->i_mtime.tv_sec);
1765         btrfs_set_stack_timespec_nsec(&inode_item->mtime,
1766                                       inode->i_mtime.tv_nsec);
1767
1768         btrfs_set_stack_timespec_sec(&inode_item->ctime,
1769                                      inode->i_ctime.tv_sec);
1770         btrfs_set_stack_timespec_nsec(&inode_item->ctime,
1771                                       inode->i_ctime.tv_nsec);
1772
1773         btrfs_set_stack_timespec_sec(&inode_item->otime,
1774                                      BTRFS_I(inode)->i_otime.tv_sec);
1775         btrfs_set_stack_timespec_nsec(&inode_item->otime,
1776                                      BTRFS_I(inode)->i_otime.tv_nsec);
1777 }
1778
1779 int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1780 {
1781         struct btrfs_delayed_node *delayed_node;
1782         struct btrfs_inode_item *inode_item;
1783
1784         delayed_node = btrfs_get_delayed_node(inode);
1785         if (!delayed_node)
1786                 return -ENOENT;
1787
1788         mutex_lock(&delayed_node->mutex);
1789         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1790                 mutex_unlock(&delayed_node->mutex);
1791                 btrfs_release_delayed_node(delayed_node);
1792                 return -ENOENT;
1793         }
1794
1795         inode_item = &delayed_node->inode_item;
1796
1797         i_uid_write(inode, btrfs_stack_inode_uid(inode_item));
1798         i_gid_write(inode, btrfs_stack_inode_gid(inode_item));
1799         btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
1800         inode->i_mode = btrfs_stack_inode_mode(inode_item);
1801         set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
1802         inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
1803         BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
1804         BTRFS_I(inode)->last_trans = btrfs_stack_inode_transid(inode_item);
1805
1806         inode->i_version = btrfs_stack_inode_sequence(inode_item);
1807         inode->i_rdev = 0;
1808         *rdev = btrfs_stack_inode_rdev(inode_item);
1809         BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
1810
1811         inode->i_atime.tv_sec = btrfs_stack_timespec_sec(&inode_item->atime);
1812         inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->atime);
1813
1814         inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(&inode_item->mtime);
1815         inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->mtime);
1816
1817         inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(&inode_item->ctime);
1818         inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->ctime);
1819
1820         BTRFS_I(inode)->i_otime.tv_sec =
1821                 btrfs_stack_timespec_sec(&inode_item->otime);
1822         BTRFS_I(inode)->i_otime.tv_nsec =
1823                 btrfs_stack_timespec_nsec(&inode_item->otime);
1824
1825         inode->i_generation = BTRFS_I(inode)->generation;
1826         BTRFS_I(inode)->index_cnt = (u64)-1;
1827
1828         mutex_unlock(&delayed_node->mutex);
1829         btrfs_release_delayed_node(delayed_node);
1830         return 0;
1831 }
1832
1833 int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1834                                struct btrfs_root *root, struct inode *inode)
1835 {
1836         struct btrfs_delayed_node *delayed_node;
1837         int ret = 0;
1838
1839         delayed_node = btrfs_get_or_create_delayed_node(inode);
1840         if (IS_ERR(delayed_node))
1841                 return PTR_ERR(delayed_node);
1842
1843         mutex_lock(&delayed_node->mutex);
1844         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1845                 fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1846                 goto release_node;
1847         }
1848
1849         ret = btrfs_delayed_inode_reserve_metadata(trans, root, inode,
1850                                                    delayed_node);
1851         if (ret)
1852                 goto release_node;
1853
1854         fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1855         set_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
1856         delayed_node->count++;
1857         atomic_inc(&root->fs_info->delayed_root->items);
1858 release_node:
1859         mutex_unlock(&delayed_node->mutex);
1860         btrfs_release_delayed_node(delayed_node);
1861         return ret;
1862 }
1863
1864 int btrfs_delayed_delete_inode_ref(struct inode *inode)
1865 {
1866         struct btrfs_delayed_node *delayed_node;
1867
1868         /*
1869          * we don't do delayed inode updates during log recovery because it
1870          * leads to enospc problems.  This means we also can't do
1871          * delayed inode refs
1872          */
1873         if (BTRFS_I(inode)->root->fs_info->log_root_recovering)
1874                 return -EAGAIN;
1875
1876         delayed_node = btrfs_get_or_create_delayed_node(inode);
1877         if (IS_ERR(delayed_node))
1878                 return PTR_ERR(delayed_node);
1879
1880         /*
1881          * We don't reserve space for inode ref deletion is because:
1882          * - We ONLY do async inode ref deletion for the inode who has only
1883          *   one link(i_nlink == 1), it means there is only one inode ref.
1884          *   And in most case, the inode ref and the inode item are in the
1885          *   same leaf, and we will deal with them at the same time.
1886          *   Since we are sure we will reserve the space for the inode item,
1887          *   it is unnecessary to reserve space for inode ref deletion.
1888          * - If the inode ref and the inode item are not in the same leaf,
1889          *   We also needn't worry about enospc problem, because we reserve
1890          *   much more space for the inode update than it needs.
1891          * - At the worst, we can steal some space from the global reservation.
1892          *   It is very rare.
1893          */
1894         mutex_lock(&delayed_node->mutex);
1895         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1896                 goto release_node;
1897
1898         set_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
1899         delayed_node->count++;
1900         atomic_inc(&BTRFS_I(inode)->root->fs_info->delayed_root->items);
1901 release_node:
1902         mutex_unlock(&delayed_node->mutex);
1903         btrfs_release_delayed_node(delayed_node);
1904         return 0;
1905 }
1906
1907 static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
1908 {
1909         struct btrfs_root *root = delayed_node->root;
1910         struct btrfs_delayed_item *curr_item, *prev_item;
1911
1912         mutex_lock(&delayed_node->mutex);
1913         curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
1914         while (curr_item) {
1915                 btrfs_delayed_item_release_metadata(root, curr_item);
1916                 prev_item = curr_item;
1917                 curr_item = __btrfs_next_delayed_item(prev_item);
1918                 btrfs_release_delayed_item(prev_item);
1919         }
1920
1921         curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
1922         while (curr_item) {
1923                 btrfs_delayed_item_release_metadata(root, curr_item);
1924                 prev_item = curr_item;
1925                 curr_item = __btrfs_next_delayed_item(prev_item);
1926                 btrfs_release_delayed_item(prev_item);
1927         }
1928
1929         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1930                 btrfs_release_delayed_iref(delayed_node);
1931
1932         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1933                 btrfs_delayed_inode_release_metadata(root, delayed_node);
1934                 btrfs_release_delayed_inode(delayed_node);
1935         }
1936         mutex_unlock(&delayed_node->mutex);
1937 }
1938
1939 void btrfs_kill_delayed_inode_items(struct inode *inode)
1940 {
1941         struct btrfs_delayed_node *delayed_node;
1942
1943         delayed_node = btrfs_get_delayed_node(inode);
1944         if (!delayed_node)
1945                 return;
1946
1947         __btrfs_kill_delayed_node(delayed_node);
1948         btrfs_release_delayed_node(delayed_node);
1949 }
1950
1951 void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
1952 {
1953         u64 inode_id = 0;
1954         struct btrfs_delayed_node *delayed_nodes[8];
1955         int i, n;
1956
1957         while (1) {
1958                 spin_lock(&root->inode_lock);
1959                 n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
1960                                            (void **)delayed_nodes, inode_id,
1961                                            ARRAY_SIZE(delayed_nodes));
1962                 if (!n) {
1963                         spin_unlock(&root->inode_lock);
1964                         break;
1965                 }
1966
1967                 inode_id = delayed_nodes[n - 1]->inode_id + 1;
1968
1969                 for (i = 0; i < n; i++)
1970                         atomic_inc(&delayed_nodes[i]->refs);
1971                 spin_unlock(&root->inode_lock);
1972
1973                 for (i = 0; i < n; i++) {
1974                         __btrfs_kill_delayed_node(delayed_nodes[i]);
1975                         btrfs_release_delayed_node(delayed_nodes[i]);
1976                 }
1977         }
1978 }
1979
1980 void btrfs_destroy_delayed_inodes(struct btrfs_root *root)
1981 {
1982         struct btrfs_delayed_root *delayed_root;
1983         struct btrfs_delayed_node *curr_node, *prev_node;
1984
1985         delayed_root = btrfs_get_delayed_root(root);
1986
1987         curr_node = btrfs_first_delayed_node(delayed_root);
1988         while (curr_node) {
1989                 __btrfs_kill_delayed_node(curr_node);
1990
1991                 prev_node = curr_node;
1992                 curr_node = btrfs_next_delayed_node(curr_node);
1993                 btrfs_release_delayed_node(prev_node);
1994         }
1995 }
1996