054577bddaf27869d9a524a73d4df5a76072e4e1
[cascardo/linux.git] / fs / btrfs / delayed-inode.c
1 /*
2  * Copyright (C) 2011 Fujitsu.  All rights reserved.
3  * Written by Miao Xie <miaox@cn.fujitsu.com>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public
7  * License v2 as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public
15  * License along with this program; if not, write to the
16  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17  * Boston, MA 021110-1307, USA.
18  */
19
20 #include <linux/slab.h>
21 #include "delayed-inode.h"
22 #include "disk-io.h"
23 #include "transaction.h"
24 #include "ctree.h"
25
26 #define BTRFS_DELAYED_WRITEBACK         512
27 #define BTRFS_DELAYED_BACKGROUND        128
28 #define BTRFS_DELAYED_BATCH             16
29
30 static struct kmem_cache *delayed_node_cache;
31
32 int __init btrfs_delayed_inode_init(void)
33 {
34         delayed_node_cache = kmem_cache_create("btrfs_delayed_node",
35                                         sizeof(struct btrfs_delayed_node),
36                                         0,
37                                         SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
38                                         NULL);
39         if (!delayed_node_cache)
40                 return -ENOMEM;
41         return 0;
42 }
43
44 void btrfs_delayed_inode_exit(void)
45 {
46         if (delayed_node_cache)
47                 kmem_cache_destroy(delayed_node_cache);
48 }
49
50 static inline void btrfs_init_delayed_node(
51                                 struct btrfs_delayed_node *delayed_node,
52                                 struct btrfs_root *root, u64 inode_id)
53 {
54         delayed_node->root = root;
55         delayed_node->inode_id = inode_id;
56         atomic_set(&delayed_node->refs, 0);
57         delayed_node->count = 0;
58         delayed_node->flags = 0;
59         delayed_node->ins_root = RB_ROOT;
60         delayed_node->del_root = RB_ROOT;
61         mutex_init(&delayed_node->mutex);
62         delayed_node->index_cnt = 0;
63         INIT_LIST_HEAD(&delayed_node->n_list);
64         INIT_LIST_HEAD(&delayed_node->p_list);
65         delayed_node->bytes_reserved = 0;
66         memset(&delayed_node->inode_item, 0, sizeof(delayed_node->inode_item));
67 }
68
69 static inline int btrfs_is_continuous_delayed_item(
70                                         struct btrfs_delayed_item *item1,
71                                         struct btrfs_delayed_item *item2)
72 {
73         if (item1->key.type == BTRFS_DIR_INDEX_KEY &&
74             item1->key.objectid == item2->key.objectid &&
75             item1->key.type == item2->key.type &&
76             item1->key.offset + 1 == item2->key.offset)
77                 return 1;
78         return 0;
79 }
80
81 static inline struct btrfs_delayed_root *btrfs_get_delayed_root(
82                                                         struct btrfs_root *root)
83 {
84         return root->fs_info->delayed_root;
85 }
86
87 static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
88 {
89         struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
90         struct btrfs_root *root = btrfs_inode->root;
91         u64 ino = btrfs_ino(inode);
92         struct btrfs_delayed_node *node;
93
94         node = ACCESS_ONCE(btrfs_inode->delayed_node);
95         if (node) {
96                 atomic_inc(&node->refs);
97                 return node;
98         }
99
100         spin_lock(&root->inode_lock);
101         node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
102         if (node) {
103                 if (btrfs_inode->delayed_node) {
104                         atomic_inc(&node->refs);        /* can be accessed */
105                         BUG_ON(btrfs_inode->delayed_node != node);
106                         spin_unlock(&root->inode_lock);
107                         return node;
108                 }
109                 btrfs_inode->delayed_node = node;
110                 /* can be accessed and cached in the inode */
111                 atomic_add(2, &node->refs);
112                 spin_unlock(&root->inode_lock);
113                 return node;
114         }
115         spin_unlock(&root->inode_lock);
116
117         return NULL;
118 }
119
120 /* Will return either the node or PTR_ERR(-ENOMEM) */
121 static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
122                                                         struct inode *inode)
123 {
124         struct btrfs_delayed_node *node;
125         struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
126         struct btrfs_root *root = btrfs_inode->root;
127         u64 ino = btrfs_ino(inode);
128         int ret;
129
130 again:
131         node = btrfs_get_delayed_node(inode);
132         if (node)
133                 return node;
134
135         node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS);
136         if (!node)
137                 return ERR_PTR(-ENOMEM);
138         btrfs_init_delayed_node(node, root, ino);
139
140         /* cached in the btrfs inode and can be accessed */
141         atomic_add(2, &node->refs);
142
143         ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
144         if (ret) {
145                 kmem_cache_free(delayed_node_cache, node);
146                 return ERR_PTR(ret);
147         }
148
149         spin_lock(&root->inode_lock);
150         ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
151         if (ret == -EEXIST) {
152                 spin_unlock(&root->inode_lock);
153                 kmem_cache_free(delayed_node_cache, node);
154                 radix_tree_preload_end();
155                 goto again;
156         }
157         btrfs_inode->delayed_node = node;
158         spin_unlock(&root->inode_lock);
159         radix_tree_preload_end();
160
161         return node;
162 }
163
164 /*
165  * Call it when holding delayed_node->mutex
166  *
167  * If mod = 1, add this node into the prepared list.
168  */
169 static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
170                                      struct btrfs_delayed_node *node,
171                                      int mod)
172 {
173         spin_lock(&root->lock);
174         if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
175                 if (!list_empty(&node->p_list))
176                         list_move_tail(&node->p_list, &root->prepare_list);
177                 else if (mod)
178                         list_add_tail(&node->p_list, &root->prepare_list);
179         } else {
180                 list_add_tail(&node->n_list, &root->node_list);
181                 list_add_tail(&node->p_list, &root->prepare_list);
182                 atomic_inc(&node->refs);        /* inserted into list */
183                 root->nodes++;
184                 set_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
185         }
186         spin_unlock(&root->lock);
187 }
188
189 /* Call it when holding delayed_node->mutex */
190 static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
191                                        struct btrfs_delayed_node *node)
192 {
193         spin_lock(&root->lock);
194         if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
195                 root->nodes--;
196                 atomic_dec(&node->refs);        /* not in the list */
197                 list_del_init(&node->n_list);
198                 if (!list_empty(&node->p_list))
199                         list_del_init(&node->p_list);
200                 clear_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags);
201         }
202         spin_unlock(&root->lock);
203 }
204
205 static struct btrfs_delayed_node *btrfs_first_delayed_node(
206                         struct btrfs_delayed_root *delayed_root)
207 {
208         struct list_head *p;
209         struct btrfs_delayed_node *node = NULL;
210
211         spin_lock(&delayed_root->lock);
212         if (list_empty(&delayed_root->node_list))
213                 goto out;
214
215         p = delayed_root->node_list.next;
216         node = list_entry(p, struct btrfs_delayed_node, n_list);
217         atomic_inc(&node->refs);
218 out:
219         spin_unlock(&delayed_root->lock);
220
221         return node;
222 }
223
224 static struct btrfs_delayed_node *btrfs_next_delayed_node(
225                                                 struct btrfs_delayed_node *node)
226 {
227         struct btrfs_delayed_root *delayed_root;
228         struct list_head *p;
229         struct btrfs_delayed_node *next = NULL;
230
231         delayed_root = node->root->fs_info->delayed_root;
232         spin_lock(&delayed_root->lock);
233         if (!test_bit(BTRFS_DELAYED_NODE_IN_LIST, &node->flags)) {
234                 /* not in the list */
235                 if (list_empty(&delayed_root->node_list))
236                         goto out;
237                 p = delayed_root->node_list.next;
238         } else if (list_is_last(&node->n_list, &delayed_root->node_list))
239                 goto out;
240         else
241                 p = node->n_list.next;
242
243         next = list_entry(p, struct btrfs_delayed_node, n_list);
244         atomic_inc(&next->refs);
245 out:
246         spin_unlock(&delayed_root->lock);
247
248         return next;
249 }
250
251 static void __btrfs_release_delayed_node(
252                                 struct btrfs_delayed_node *delayed_node,
253                                 int mod)
254 {
255         struct btrfs_delayed_root *delayed_root;
256
257         if (!delayed_node)
258                 return;
259
260         delayed_root = delayed_node->root->fs_info->delayed_root;
261
262         mutex_lock(&delayed_node->mutex);
263         if (delayed_node->count)
264                 btrfs_queue_delayed_node(delayed_root, delayed_node, mod);
265         else
266                 btrfs_dequeue_delayed_node(delayed_root, delayed_node);
267         mutex_unlock(&delayed_node->mutex);
268
269         if (atomic_dec_and_test(&delayed_node->refs)) {
270                 bool free = false;
271                 struct btrfs_root *root = delayed_node->root;
272                 spin_lock(&root->inode_lock);
273                 if (atomic_read(&delayed_node->refs) == 0) {
274                         radix_tree_delete(&root->delayed_nodes_tree,
275                                           delayed_node->inode_id);
276                         free = true;
277                 }
278                 spin_unlock(&root->inode_lock);
279                 if (free)
280                         kmem_cache_free(delayed_node_cache, delayed_node);
281         }
282 }
283
284 static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
285 {
286         __btrfs_release_delayed_node(node, 0);
287 }
288
289 static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
290                                         struct btrfs_delayed_root *delayed_root)
291 {
292         struct list_head *p;
293         struct btrfs_delayed_node *node = NULL;
294
295         spin_lock(&delayed_root->lock);
296         if (list_empty(&delayed_root->prepare_list))
297                 goto out;
298
299         p = delayed_root->prepare_list.next;
300         list_del_init(p);
301         node = list_entry(p, struct btrfs_delayed_node, p_list);
302         atomic_inc(&node->refs);
303 out:
304         spin_unlock(&delayed_root->lock);
305
306         return node;
307 }
308
309 static inline void btrfs_release_prepared_delayed_node(
310                                         struct btrfs_delayed_node *node)
311 {
312         __btrfs_release_delayed_node(node, 1);
313 }
314
315 static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
316 {
317         struct btrfs_delayed_item *item;
318         item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
319         if (item) {
320                 item->data_len = data_len;
321                 item->ins_or_del = 0;
322                 item->bytes_reserved = 0;
323                 item->delayed_node = NULL;
324                 atomic_set(&item->refs, 1);
325         }
326         return item;
327 }
328
329 /*
330  * __btrfs_lookup_delayed_item - look up the delayed item by key
331  * @delayed_node: pointer to the delayed node
332  * @key:          the key to look up
333  * @prev:         used to store the prev item if the right item isn't found
334  * @next:         used to store the next item if the right item isn't found
335  *
336  * Note: if we don't find the right item, we will return the prev item and
337  * the next item.
338  */
339 static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
340                                 struct rb_root *root,
341                                 struct btrfs_key *key,
342                                 struct btrfs_delayed_item **prev,
343                                 struct btrfs_delayed_item **next)
344 {
345         struct rb_node *node, *prev_node = NULL;
346         struct btrfs_delayed_item *delayed_item = NULL;
347         int ret = 0;
348
349         node = root->rb_node;
350
351         while (node) {
352                 delayed_item = rb_entry(node, struct btrfs_delayed_item,
353                                         rb_node);
354                 prev_node = node;
355                 ret = btrfs_comp_cpu_keys(&delayed_item->key, key);
356                 if (ret < 0)
357                         node = node->rb_right;
358                 else if (ret > 0)
359                         node = node->rb_left;
360                 else
361                         return delayed_item;
362         }
363
364         if (prev) {
365                 if (!prev_node)
366                         *prev = NULL;
367                 else if (ret < 0)
368                         *prev = delayed_item;
369                 else if ((node = rb_prev(prev_node)) != NULL) {
370                         *prev = rb_entry(node, struct btrfs_delayed_item,
371                                          rb_node);
372                 } else
373                         *prev = NULL;
374         }
375
376         if (next) {
377                 if (!prev_node)
378                         *next = NULL;
379                 else if (ret > 0)
380                         *next = delayed_item;
381                 else if ((node = rb_next(prev_node)) != NULL) {
382                         *next = rb_entry(node, struct btrfs_delayed_item,
383                                          rb_node);
384                 } else
385                         *next = NULL;
386         }
387         return NULL;
388 }
389
390 static struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
391                                         struct btrfs_delayed_node *delayed_node,
392                                         struct btrfs_key *key)
393 {
394         struct btrfs_delayed_item *item;
395
396         item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
397                                            NULL, NULL);
398         return item;
399 }
400
401 static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
402                                     struct btrfs_delayed_item *ins,
403                                     int action)
404 {
405         struct rb_node **p, *node;
406         struct rb_node *parent_node = NULL;
407         struct rb_root *root;
408         struct btrfs_delayed_item *item;
409         int cmp;
410
411         if (action == BTRFS_DELAYED_INSERTION_ITEM)
412                 root = &delayed_node->ins_root;
413         else if (action == BTRFS_DELAYED_DELETION_ITEM)
414                 root = &delayed_node->del_root;
415         else
416                 BUG();
417         p = &root->rb_node;
418         node = &ins->rb_node;
419
420         while (*p) {
421                 parent_node = *p;
422                 item = rb_entry(parent_node, struct btrfs_delayed_item,
423                                  rb_node);
424
425                 cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
426                 if (cmp < 0)
427                         p = &(*p)->rb_right;
428                 else if (cmp > 0)
429                         p = &(*p)->rb_left;
430                 else
431                         return -EEXIST;
432         }
433
434         rb_link_node(node, parent_node, p);
435         rb_insert_color(node, root);
436         ins->delayed_node = delayed_node;
437         ins->ins_or_del = action;
438
439         if (ins->key.type == BTRFS_DIR_INDEX_KEY &&
440             action == BTRFS_DELAYED_INSERTION_ITEM &&
441             ins->key.offset >= delayed_node->index_cnt)
442                         delayed_node->index_cnt = ins->key.offset + 1;
443
444         delayed_node->count++;
445         atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
446         return 0;
447 }
448
449 static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node,
450                                               struct btrfs_delayed_item *item)
451 {
452         return __btrfs_add_delayed_item(node, item,
453                                         BTRFS_DELAYED_INSERTION_ITEM);
454 }
455
456 static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
457                                              struct btrfs_delayed_item *item)
458 {
459         return __btrfs_add_delayed_item(node, item,
460                                         BTRFS_DELAYED_DELETION_ITEM);
461 }
462
463 static void finish_one_item(struct btrfs_delayed_root *delayed_root)
464 {
465         int seq = atomic_inc_return(&delayed_root->items_seq);
466         if ((atomic_dec_return(&delayed_root->items) <
467             BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0) &&
468             waitqueue_active(&delayed_root->wait))
469                 wake_up(&delayed_root->wait);
470 }
471
472 static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
473 {
474         struct rb_root *root;
475         struct btrfs_delayed_root *delayed_root;
476
477         delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
478
479         BUG_ON(!delayed_root);
480         BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM &&
481                delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM);
482
483         if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
484                 root = &delayed_item->delayed_node->ins_root;
485         else
486                 root = &delayed_item->delayed_node->del_root;
487
488         rb_erase(&delayed_item->rb_node, root);
489         delayed_item->delayed_node->count--;
490
491         finish_one_item(delayed_root);
492 }
493
494 static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
495 {
496         if (item) {
497                 __btrfs_remove_delayed_item(item);
498                 if (atomic_dec_and_test(&item->refs))
499                         kfree(item);
500         }
501 }
502
503 static struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
504                                         struct btrfs_delayed_node *delayed_node)
505 {
506         struct rb_node *p;
507         struct btrfs_delayed_item *item = NULL;
508
509         p = rb_first(&delayed_node->ins_root);
510         if (p)
511                 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
512
513         return item;
514 }
515
516 static struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
517                                         struct btrfs_delayed_node *delayed_node)
518 {
519         struct rb_node *p;
520         struct btrfs_delayed_item *item = NULL;
521
522         p = rb_first(&delayed_node->del_root);
523         if (p)
524                 item = rb_entry(p, struct btrfs_delayed_item, rb_node);
525
526         return item;
527 }
528
529 static struct btrfs_delayed_item *__btrfs_next_delayed_item(
530                                                 struct btrfs_delayed_item *item)
531 {
532         struct rb_node *p;
533         struct btrfs_delayed_item *next = NULL;
534
535         p = rb_next(&item->rb_node);
536         if (p)
537                 next = rb_entry(p, struct btrfs_delayed_item, rb_node);
538
539         return next;
540 }
541
542 static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
543                                                struct btrfs_root *root,
544                                                struct btrfs_delayed_item *item)
545 {
546         struct btrfs_block_rsv *src_rsv;
547         struct btrfs_block_rsv *dst_rsv;
548         u64 num_bytes;
549         int ret;
550
551         if (!trans->bytes_reserved)
552                 return 0;
553
554         src_rsv = trans->block_rsv;
555         dst_rsv = &root->fs_info->delayed_block_rsv;
556
557         num_bytes = btrfs_calc_trans_metadata_size(root, 1);
558         ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
559         if (!ret) {
560                 trace_btrfs_space_reservation(root->fs_info, "delayed_item",
561                                               item->key.objectid,
562                                               num_bytes, 1);
563                 item->bytes_reserved = num_bytes;
564         }
565
566         return ret;
567 }
568
569 static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
570                                                 struct btrfs_delayed_item *item)
571 {
572         struct btrfs_block_rsv *rsv;
573
574         if (!item->bytes_reserved)
575                 return;
576
577         rsv = &root->fs_info->delayed_block_rsv;
578         trace_btrfs_space_reservation(root->fs_info, "delayed_item",
579                                       item->key.objectid, item->bytes_reserved,
580                                       0);
581         btrfs_block_rsv_release(root, rsv,
582                                 item->bytes_reserved);
583 }
584
585 static int btrfs_delayed_inode_reserve_metadata(
586                                         struct btrfs_trans_handle *trans,
587                                         struct btrfs_root *root,
588                                         struct inode *inode,
589                                         struct btrfs_delayed_node *node)
590 {
591         struct btrfs_block_rsv *src_rsv;
592         struct btrfs_block_rsv *dst_rsv;
593         u64 num_bytes;
594         int ret;
595         bool release = false;
596
597         src_rsv = trans->block_rsv;
598         dst_rsv = &root->fs_info->delayed_block_rsv;
599
600         num_bytes = btrfs_calc_trans_metadata_size(root, 1);
601
602         /*
603          * btrfs_dirty_inode will update the inode under btrfs_join_transaction
604          * which doesn't reserve space for speed.  This is a problem since we
605          * still need to reserve space for this update, so try to reserve the
606          * space.
607          *
608          * Now if src_rsv == delalloc_block_rsv we'll let it just steal since
609          * we're accounted for.
610          */
611         if (!src_rsv || (!trans->bytes_reserved &&
612                          src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
613                 ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
614                                           BTRFS_RESERVE_NO_FLUSH);
615                 /*
616                  * Since we're under a transaction reserve_metadata_bytes could
617                  * try to commit the transaction which will make it return
618                  * EAGAIN to make us stop the transaction we have, so return
619                  * ENOSPC instead so that btrfs_dirty_inode knows what to do.
620                  */
621                 if (ret == -EAGAIN)
622                         ret = -ENOSPC;
623                 if (!ret) {
624                         node->bytes_reserved = num_bytes;
625                         trace_btrfs_space_reservation(root->fs_info,
626                                                       "delayed_inode",
627                                                       btrfs_ino(inode),
628                                                       num_bytes, 1);
629                 }
630                 return ret;
631         } else if (src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) {
632                 spin_lock(&BTRFS_I(inode)->lock);
633                 if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED,
634                                        &BTRFS_I(inode)->runtime_flags)) {
635                         spin_unlock(&BTRFS_I(inode)->lock);
636                         release = true;
637                         goto migrate;
638                 }
639                 spin_unlock(&BTRFS_I(inode)->lock);
640
641                 /* Ok we didn't have space pre-reserved.  This shouldn't happen
642                  * too often but it can happen if we do delalloc to an existing
643                  * inode which gets dirtied because of the time update, and then
644                  * isn't touched again until after the transaction commits and
645                  * then we try to write out the data.  First try to be nice and
646                  * reserve something strictly for us.  If not be a pain and try
647                  * to steal from the delalloc block rsv.
648                  */
649                 ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
650                                           BTRFS_RESERVE_NO_FLUSH);
651                 if (!ret)
652                         goto out;
653
654                 ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
655                 if (!WARN_ON(ret))
656                         goto out;
657
658                 /*
659                  * Ok this is a problem, let's just steal from the global rsv
660                  * since this really shouldn't happen that often.
661                  */
662                 ret = btrfs_block_rsv_migrate(&root->fs_info->global_block_rsv,
663                                               dst_rsv, num_bytes);
664                 goto out;
665         }
666
667 migrate:
668         ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
669
670 out:
671         /*
672          * Migrate only takes a reservation, it doesn't touch the size of the
673          * block_rsv.  This is to simplify people who don't normally have things
674          * migrated from their block rsv.  If they go to release their
675          * reservation, that will decrease the size as well, so if migrate
676          * reduced size we'd end up with a negative size.  But for the
677          * delalloc_meta_reserved stuff we will only know to drop 1 reservation,
678          * but we could in fact do this reserve/migrate dance several times
679          * between the time we did the original reservation and we'd clean it
680          * up.  So to take care of this, release the space for the meta
681          * reservation here.  I think it may be time for a documentation page on
682          * how block rsvs. work.
683          */
684         if (!ret) {
685                 trace_btrfs_space_reservation(root->fs_info, "delayed_inode",
686                                               btrfs_ino(inode), num_bytes, 1);
687                 node->bytes_reserved = num_bytes;
688         }
689
690         if (release) {
691                 trace_btrfs_space_reservation(root->fs_info, "delalloc",
692                                               btrfs_ino(inode), num_bytes, 0);
693                 btrfs_block_rsv_release(root, src_rsv, num_bytes);
694         }
695
696         return ret;
697 }
698
699 static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root,
700                                                 struct btrfs_delayed_node *node)
701 {
702         struct btrfs_block_rsv *rsv;
703
704         if (!node->bytes_reserved)
705                 return;
706
707         rsv = &root->fs_info->delayed_block_rsv;
708         trace_btrfs_space_reservation(root->fs_info, "delayed_inode",
709                                       node->inode_id, node->bytes_reserved, 0);
710         btrfs_block_rsv_release(root, rsv,
711                                 node->bytes_reserved);
712         node->bytes_reserved = 0;
713 }
714
715 /*
716  * This helper will insert some continuous items into the same leaf according
717  * to the free space of the leaf.
718  */
719 static int btrfs_batch_insert_items(struct btrfs_root *root,
720                                     struct btrfs_path *path,
721                                     struct btrfs_delayed_item *item)
722 {
723         struct btrfs_delayed_item *curr, *next;
724         int free_space;
725         int total_data_size = 0, total_size = 0;
726         struct extent_buffer *leaf;
727         char *data_ptr;
728         struct btrfs_key *keys;
729         u32 *data_size;
730         struct list_head head;
731         int slot;
732         int nitems;
733         int i;
734         int ret = 0;
735
736         BUG_ON(!path->nodes[0]);
737
738         leaf = path->nodes[0];
739         free_space = btrfs_leaf_free_space(root, leaf);
740         INIT_LIST_HEAD(&head);
741
742         next = item;
743         nitems = 0;
744
745         /*
746          * count the number of the continuous items that we can insert in batch
747          */
748         while (total_size + next->data_len + sizeof(struct btrfs_item) <=
749                free_space) {
750                 total_data_size += next->data_len;
751                 total_size += next->data_len + sizeof(struct btrfs_item);
752                 list_add_tail(&next->tree_list, &head);
753                 nitems++;
754
755                 curr = next;
756                 next = __btrfs_next_delayed_item(curr);
757                 if (!next)
758                         break;
759
760                 if (!btrfs_is_continuous_delayed_item(curr, next))
761                         break;
762         }
763
764         if (!nitems) {
765                 ret = 0;
766                 goto out;
767         }
768
769         /*
770          * we need allocate some memory space, but it might cause the task
771          * to sleep, so we set all locked nodes in the path to blocking locks
772          * first.
773          */
774         btrfs_set_path_blocking(path);
775
776         keys = kmalloc_array(nitems, sizeof(struct btrfs_key), GFP_NOFS);
777         if (!keys) {
778                 ret = -ENOMEM;
779                 goto out;
780         }
781
782         data_size = kmalloc_array(nitems, sizeof(u32), GFP_NOFS);
783         if (!data_size) {
784                 ret = -ENOMEM;
785                 goto error;
786         }
787
788         /* get keys of all the delayed items */
789         i = 0;
790         list_for_each_entry(next, &head, tree_list) {
791                 keys[i] = next->key;
792                 data_size[i] = next->data_len;
793                 i++;
794         }
795
796         /* reset all the locked nodes in the patch to spinning locks. */
797         btrfs_clear_path_blocking(path, NULL, 0);
798
799         /* insert the keys of the items */
800         setup_items_for_insert(root, path, keys, data_size,
801                                total_data_size, total_size, nitems);
802
803         /* insert the dir index items */
804         slot = path->slots[0];
805         list_for_each_entry_safe(curr, next, &head, tree_list) {
806                 data_ptr = btrfs_item_ptr(leaf, slot, char);
807                 write_extent_buffer(leaf, &curr->data,
808                                     (unsigned long)data_ptr,
809                                     curr->data_len);
810                 slot++;
811
812                 btrfs_delayed_item_release_metadata(root, curr);
813
814                 list_del(&curr->tree_list);
815                 btrfs_release_delayed_item(curr);
816         }
817
818 error:
819         kfree(data_size);
820         kfree(keys);
821 out:
822         return ret;
823 }
824
825 /*
826  * This helper can just do simple insertion that needn't extend item for new
827  * data, such as directory name index insertion, inode insertion.
828  */
829 static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
830                                      struct btrfs_root *root,
831                                      struct btrfs_path *path,
832                                      struct btrfs_delayed_item *delayed_item)
833 {
834         struct extent_buffer *leaf;
835         char *ptr;
836         int ret;
837
838         ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
839                                       delayed_item->data_len);
840         if (ret < 0 && ret != -EEXIST)
841                 return ret;
842
843         leaf = path->nodes[0];
844
845         ptr = btrfs_item_ptr(leaf, path->slots[0], char);
846
847         write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
848                             delayed_item->data_len);
849         btrfs_mark_buffer_dirty(leaf);
850
851         btrfs_delayed_item_release_metadata(root, delayed_item);
852         return 0;
853 }
854
855 /*
856  * we insert an item first, then if there are some continuous items, we try
857  * to insert those items into the same leaf.
858  */
859 static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
860                                       struct btrfs_path *path,
861                                       struct btrfs_root *root,
862                                       struct btrfs_delayed_node *node)
863 {
864         struct btrfs_delayed_item *curr, *prev;
865         int ret = 0;
866
867 do_again:
868         mutex_lock(&node->mutex);
869         curr = __btrfs_first_delayed_insertion_item(node);
870         if (!curr)
871                 goto insert_end;
872
873         ret = btrfs_insert_delayed_item(trans, root, path, curr);
874         if (ret < 0) {
875                 btrfs_release_path(path);
876                 goto insert_end;
877         }
878
879         prev = curr;
880         curr = __btrfs_next_delayed_item(prev);
881         if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
882                 /* insert the continuous items into the same leaf */
883                 path->slots[0]++;
884                 btrfs_batch_insert_items(root, path, curr);
885         }
886         btrfs_release_delayed_item(prev);
887         btrfs_mark_buffer_dirty(path->nodes[0]);
888
889         btrfs_release_path(path);
890         mutex_unlock(&node->mutex);
891         goto do_again;
892
893 insert_end:
894         mutex_unlock(&node->mutex);
895         return ret;
896 }
897
898 static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
899                                     struct btrfs_root *root,
900                                     struct btrfs_path *path,
901                                     struct btrfs_delayed_item *item)
902 {
903         struct btrfs_delayed_item *curr, *next;
904         struct extent_buffer *leaf;
905         struct btrfs_key key;
906         struct list_head head;
907         int nitems, i, last_item;
908         int ret = 0;
909
910         BUG_ON(!path->nodes[0]);
911
912         leaf = path->nodes[0];
913
914         i = path->slots[0];
915         last_item = btrfs_header_nritems(leaf) - 1;
916         if (i > last_item)
917                 return -ENOENT; /* FIXME: Is errno suitable? */
918
919         next = item;
920         INIT_LIST_HEAD(&head);
921         btrfs_item_key_to_cpu(leaf, &key, i);
922         nitems = 0;
923         /*
924          * count the number of the dir index items that we can delete in batch
925          */
926         while (btrfs_comp_cpu_keys(&next->key, &key) == 0) {
927                 list_add_tail(&next->tree_list, &head);
928                 nitems++;
929
930                 curr = next;
931                 next = __btrfs_next_delayed_item(curr);
932                 if (!next)
933                         break;
934
935                 if (!btrfs_is_continuous_delayed_item(curr, next))
936                         break;
937
938                 i++;
939                 if (i > last_item)
940                         break;
941                 btrfs_item_key_to_cpu(leaf, &key, i);
942         }
943
944         if (!nitems)
945                 return 0;
946
947         ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
948         if (ret)
949                 goto out;
950
951         list_for_each_entry_safe(curr, next, &head, tree_list) {
952                 btrfs_delayed_item_release_metadata(root, curr);
953                 list_del(&curr->tree_list);
954                 btrfs_release_delayed_item(curr);
955         }
956
957 out:
958         return ret;
959 }
960
961 static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
962                                       struct btrfs_path *path,
963                                       struct btrfs_root *root,
964                                       struct btrfs_delayed_node *node)
965 {
966         struct btrfs_delayed_item *curr, *prev;
967         int ret = 0;
968
969 do_again:
970         mutex_lock(&node->mutex);
971         curr = __btrfs_first_delayed_deletion_item(node);
972         if (!curr)
973                 goto delete_fail;
974
975         ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
976         if (ret < 0)
977                 goto delete_fail;
978         else if (ret > 0) {
979                 /*
980                  * can't find the item which the node points to, so this node
981                  * is invalid, just drop it.
982                  */
983                 prev = curr;
984                 curr = __btrfs_next_delayed_item(prev);
985                 btrfs_release_delayed_item(prev);
986                 ret = 0;
987                 btrfs_release_path(path);
988                 if (curr) {
989                         mutex_unlock(&node->mutex);
990                         goto do_again;
991                 } else
992                         goto delete_fail;
993         }
994
995         btrfs_batch_delete_items(trans, root, path, curr);
996         btrfs_release_path(path);
997         mutex_unlock(&node->mutex);
998         goto do_again;
999
1000 delete_fail:
1001         btrfs_release_path(path);
1002         mutex_unlock(&node->mutex);
1003         return ret;
1004 }
1005
1006 static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
1007 {
1008         struct btrfs_delayed_root *delayed_root;
1009
1010         if (delayed_node &&
1011             test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1012                 BUG_ON(!delayed_node->root);
1013                 clear_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
1014                 delayed_node->count--;
1015
1016                 delayed_root = delayed_node->root->fs_info->delayed_root;
1017                 finish_one_item(delayed_root);
1018         }
1019 }
1020
1021 static void btrfs_release_delayed_iref(struct btrfs_delayed_node *delayed_node)
1022 {
1023         struct btrfs_delayed_root *delayed_root;
1024
1025         ASSERT(delayed_node->root);
1026         clear_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
1027         delayed_node->count--;
1028
1029         delayed_root = delayed_node->root->fs_info->delayed_root;
1030         finish_one_item(delayed_root);
1031 }
1032
1033 static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1034                                         struct btrfs_root *root,
1035                                         struct btrfs_path *path,
1036                                         struct btrfs_delayed_node *node)
1037 {
1038         struct btrfs_key key;
1039         struct btrfs_inode_item *inode_item;
1040         struct extent_buffer *leaf;
1041         int mod;
1042         int ret;
1043
1044         key.objectid = node->inode_id;
1045         key.type = BTRFS_INODE_ITEM_KEY;
1046         key.offset = 0;
1047
1048         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1049                 mod = -1;
1050         else
1051                 mod = 1;
1052
1053         ret = btrfs_lookup_inode(trans, root, path, &key, mod);
1054         if (ret > 0) {
1055                 btrfs_release_path(path);
1056                 return -ENOENT;
1057         } else if (ret < 0) {
1058                 return ret;
1059         }
1060
1061         leaf = path->nodes[0];
1062         inode_item = btrfs_item_ptr(leaf, path->slots[0],
1063                                     struct btrfs_inode_item);
1064         write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
1065                             sizeof(struct btrfs_inode_item));
1066         btrfs_mark_buffer_dirty(leaf);
1067
1068         if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
1069                 goto no_iref;
1070
1071         path->slots[0]++;
1072         if (path->slots[0] >= btrfs_header_nritems(leaf))
1073                 goto search;
1074 again:
1075         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
1076         if (key.objectid != node->inode_id)
1077                 goto out;
1078
1079         if (key.type != BTRFS_INODE_REF_KEY &&
1080             key.type != BTRFS_INODE_EXTREF_KEY)
1081                 goto out;
1082
1083         /*
1084          * Delayed iref deletion is for the inode who has only one link,
1085          * so there is only one iref. The case that several irefs are
1086          * in the same item doesn't exist.
1087          */
1088         btrfs_del_item(trans, root, path);
1089 out:
1090         btrfs_release_delayed_iref(node);
1091 no_iref:
1092         btrfs_release_path(path);
1093 err_out:
1094         btrfs_delayed_inode_release_metadata(root, node);
1095         btrfs_release_delayed_inode(node);
1096
1097         return ret;
1098
1099 search:
1100         btrfs_release_path(path);
1101
1102         key.type = BTRFS_INODE_EXTREF_KEY;
1103         key.offset = -1;
1104         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1105         if (ret < 0)
1106                 goto err_out;
1107         ASSERT(ret);
1108
1109         ret = 0;
1110         leaf = path->nodes[0];
1111         path->slots[0]--;
1112         goto again;
1113 }
1114
1115 static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1116                                              struct btrfs_root *root,
1117                                              struct btrfs_path *path,
1118                                              struct btrfs_delayed_node *node)
1119 {
1120         int ret;
1121
1122         mutex_lock(&node->mutex);
1123         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &node->flags)) {
1124                 mutex_unlock(&node->mutex);
1125                 return 0;
1126         }
1127
1128         ret = __btrfs_update_delayed_inode(trans, root, path, node);
1129         mutex_unlock(&node->mutex);
1130         return ret;
1131 }
1132
1133 static inline int
1134 __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1135                                    struct btrfs_path *path,
1136                                    struct btrfs_delayed_node *node)
1137 {
1138         int ret;
1139
1140         ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1141         if (ret)
1142                 return ret;
1143
1144         ret = btrfs_delete_delayed_items(trans, path, node->root, node);
1145         if (ret)
1146                 return ret;
1147
1148         ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1149         return ret;
1150 }
1151
1152 /*
1153  * Called when committing the transaction.
1154  * Returns 0 on success.
1155  * Returns < 0 on error and returns with an aborted transaction with any
1156  * outstanding delayed items cleaned up.
1157  */
1158 static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1159                                      struct btrfs_root *root, int nr)
1160 {
1161         struct btrfs_delayed_root *delayed_root;
1162         struct btrfs_delayed_node *curr_node, *prev_node;
1163         struct btrfs_path *path;
1164         struct btrfs_block_rsv *block_rsv;
1165         int ret = 0;
1166         bool count = (nr > 0);
1167
1168         if (trans->aborted)
1169                 return -EIO;
1170
1171         path = btrfs_alloc_path();
1172         if (!path)
1173                 return -ENOMEM;
1174         path->leave_spinning = 1;
1175
1176         block_rsv = trans->block_rsv;
1177         trans->block_rsv = &root->fs_info->delayed_block_rsv;
1178
1179         delayed_root = btrfs_get_delayed_root(root);
1180
1181         curr_node = btrfs_first_delayed_node(delayed_root);
1182         while (curr_node && (!count || (count && nr--))) {
1183                 ret = __btrfs_commit_inode_delayed_items(trans, path,
1184                                                          curr_node);
1185                 if (ret) {
1186                         btrfs_release_delayed_node(curr_node);
1187                         curr_node = NULL;
1188                         btrfs_abort_transaction(trans, root, ret);
1189                         break;
1190                 }
1191
1192                 prev_node = curr_node;
1193                 curr_node = btrfs_next_delayed_node(curr_node);
1194                 btrfs_release_delayed_node(prev_node);
1195         }
1196
1197         if (curr_node)
1198                 btrfs_release_delayed_node(curr_node);
1199         btrfs_free_path(path);
1200         trans->block_rsv = block_rsv;
1201
1202         return ret;
1203 }
1204
1205 int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1206                             struct btrfs_root *root)
1207 {
1208         return __btrfs_run_delayed_items(trans, root, -1);
1209 }
1210
1211 int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
1212                                struct btrfs_root *root, int nr)
1213 {
1214         return __btrfs_run_delayed_items(trans, root, nr);
1215 }
1216
1217 int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1218                                      struct inode *inode)
1219 {
1220         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1221         struct btrfs_path *path;
1222         struct btrfs_block_rsv *block_rsv;
1223         int ret;
1224
1225         if (!delayed_node)
1226                 return 0;
1227
1228         mutex_lock(&delayed_node->mutex);
1229         if (!delayed_node->count) {
1230                 mutex_unlock(&delayed_node->mutex);
1231                 btrfs_release_delayed_node(delayed_node);
1232                 return 0;
1233         }
1234         mutex_unlock(&delayed_node->mutex);
1235
1236         path = btrfs_alloc_path();
1237         if (!path) {
1238                 btrfs_release_delayed_node(delayed_node);
1239                 return -ENOMEM;
1240         }
1241         path->leave_spinning = 1;
1242
1243         block_rsv = trans->block_rsv;
1244         trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
1245
1246         ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1247
1248         btrfs_release_delayed_node(delayed_node);
1249         btrfs_free_path(path);
1250         trans->block_rsv = block_rsv;
1251
1252         return ret;
1253 }
1254
1255 int btrfs_commit_inode_delayed_inode(struct inode *inode)
1256 {
1257         struct btrfs_trans_handle *trans;
1258         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1259         struct btrfs_path *path;
1260         struct btrfs_block_rsv *block_rsv;
1261         int ret;
1262
1263         if (!delayed_node)
1264                 return 0;
1265
1266         mutex_lock(&delayed_node->mutex);
1267         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1268                 mutex_unlock(&delayed_node->mutex);
1269                 btrfs_release_delayed_node(delayed_node);
1270                 return 0;
1271         }
1272         mutex_unlock(&delayed_node->mutex);
1273
1274         trans = btrfs_join_transaction(delayed_node->root);
1275         if (IS_ERR(trans)) {
1276                 ret = PTR_ERR(trans);
1277                 goto out;
1278         }
1279
1280         path = btrfs_alloc_path();
1281         if (!path) {
1282                 ret = -ENOMEM;
1283                 goto trans_out;
1284         }
1285         path->leave_spinning = 1;
1286
1287         block_rsv = trans->block_rsv;
1288         trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
1289
1290         mutex_lock(&delayed_node->mutex);
1291         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags))
1292                 ret = __btrfs_update_delayed_inode(trans, delayed_node->root,
1293                                                    path, delayed_node);
1294         else
1295                 ret = 0;
1296         mutex_unlock(&delayed_node->mutex);
1297
1298         btrfs_free_path(path);
1299         trans->block_rsv = block_rsv;
1300 trans_out:
1301         btrfs_end_transaction(trans, delayed_node->root);
1302         btrfs_btree_balance_dirty(delayed_node->root);
1303 out:
1304         btrfs_release_delayed_node(delayed_node);
1305
1306         return ret;
1307 }
1308
1309 void btrfs_remove_delayed_node(struct inode *inode)
1310 {
1311         struct btrfs_delayed_node *delayed_node;
1312
1313         delayed_node = ACCESS_ONCE(BTRFS_I(inode)->delayed_node);
1314         if (!delayed_node)
1315                 return;
1316
1317         BTRFS_I(inode)->delayed_node = NULL;
1318         btrfs_release_delayed_node(delayed_node);
1319 }
1320
1321 struct btrfs_async_delayed_work {
1322         struct btrfs_delayed_root *delayed_root;
1323         int nr;
1324         struct btrfs_work work;
1325 };
1326
1327 static void btrfs_async_run_delayed_root(struct btrfs_work *work)
1328 {
1329         struct btrfs_async_delayed_work *async_work;
1330         struct btrfs_delayed_root *delayed_root;
1331         struct btrfs_trans_handle *trans;
1332         struct btrfs_path *path;
1333         struct btrfs_delayed_node *delayed_node = NULL;
1334         struct btrfs_root *root;
1335         struct btrfs_block_rsv *block_rsv;
1336         int total_done = 0;
1337
1338         async_work = container_of(work, struct btrfs_async_delayed_work, work);
1339         delayed_root = async_work->delayed_root;
1340
1341         path = btrfs_alloc_path();
1342         if (!path)
1343                 goto out;
1344
1345 again:
1346         if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND / 2)
1347                 goto free_path;
1348
1349         delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
1350         if (!delayed_node)
1351                 goto free_path;
1352
1353         path->leave_spinning = 1;
1354         root = delayed_node->root;
1355
1356         trans = btrfs_join_transaction(root);
1357         if (IS_ERR(trans))
1358                 goto release_path;
1359
1360         block_rsv = trans->block_rsv;
1361         trans->block_rsv = &root->fs_info->delayed_block_rsv;
1362
1363         __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1364
1365         trans->block_rsv = block_rsv;
1366         btrfs_end_transaction(trans, root);
1367         btrfs_btree_balance_dirty_nodelay(root);
1368
1369 release_path:
1370         btrfs_release_path(path);
1371         total_done++;
1372
1373         btrfs_release_prepared_delayed_node(delayed_node);
1374         if (async_work->nr == 0 || total_done < async_work->nr)
1375                 goto again;
1376
1377 free_path:
1378         btrfs_free_path(path);
1379 out:
1380         wake_up(&delayed_root->wait);
1381         kfree(async_work);
1382 }
1383
1384
1385 static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
1386                                      struct btrfs_root *root, int nr)
1387 {
1388         struct btrfs_async_delayed_work *async_work;
1389
1390         if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1391                 return 0;
1392
1393         async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
1394         if (!async_work)
1395                 return -ENOMEM;
1396
1397         async_work->delayed_root = delayed_root;
1398         btrfs_init_work(&async_work->work, btrfs_delayed_meta_helper,
1399                         btrfs_async_run_delayed_root, NULL, NULL);
1400         async_work->nr = nr;
1401
1402         btrfs_queue_work(root->fs_info->delayed_workers, &async_work->work);
1403         return 0;
1404 }
1405
1406 void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
1407 {
1408         struct btrfs_delayed_root *delayed_root;
1409         delayed_root = btrfs_get_delayed_root(root);
1410         WARN_ON(btrfs_first_delayed_node(delayed_root));
1411 }
1412
1413 static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
1414 {
1415         int val = atomic_read(&delayed_root->items_seq);
1416
1417         if (val < seq || val >= seq + BTRFS_DELAYED_BATCH)
1418                 return 1;
1419
1420         if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1421                 return 1;
1422
1423         return 0;
1424 }
1425
1426 void btrfs_balance_delayed_items(struct btrfs_root *root)
1427 {
1428         struct btrfs_delayed_root *delayed_root;
1429
1430         delayed_root = btrfs_get_delayed_root(root);
1431
1432         if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
1433                 return;
1434
1435         if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
1436                 int seq;
1437                 int ret;
1438
1439                 seq = atomic_read(&delayed_root->items_seq);
1440
1441                 ret = btrfs_wq_run_delayed_node(delayed_root, root, 0);
1442                 if (ret)
1443                         return;
1444
1445                 wait_event_interruptible(delayed_root->wait,
1446                                          could_end_wait(delayed_root, seq));
1447                 return;
1448         }
1449
1450         btrfs_wq_run_delayed_node(delayed_root, root, BTRFS_DELAYED_BATCH);
1451 }
1452
1453 /* Will return 0 or -ENOMEM */
1454 int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
1455                                    struct btrfs_root *root, const char *name,
1456                                    int name_len, struct inode *dir,
1457                                    struct btrfs_disk_key *disk_key, u8 type,
1458                                    u64 index)
1459 {
1460         struct btrfs_delayed_node *delayed_node;
1461         struct btrfs_delayed_item *delayed_item;
1462         struct btrfs_dir_item *dir_item;
1463         int ret;
1464
1465         delayed_node = btrfs_get_or_create_delayed_node(dir);
1466         if (IS_ERR(delayed_node))
1467                 return PTR_ERR(delayed_node);
1468
1469         delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len);
1470         if (!delayed_item) {
1471                 ret = -ENOMEM;
1472                 goto release_node;
1473         }
1474
1475         delayed_item->key.objectid = btrfs_ino(dir);
1476         delayed_item->key.type = BTRFS_DIR_INDEX_KEY;
1477         delayed_item->key.offset = index;
1478
1479         dir_item = (struct btrfs_dir_item *)delayed_item->data;
1480         dir_item->location = *disk_key;
1481         btrfs_set_stack_dir_transid(dir_item, trans->transid);
1482         btrfs_set_stack_dir_data_len(dir_item, 0);
1483         btrfs_set_stack_dir_name_len(dir_item, name_len);
1484         btrfs_set_stack_dir_type(dir_item, type);
1485         memcpy((char *)(dir_item + 1), name, name_len);
1486
1487         ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item);
1488         /*
1489          * we have reserved enough space when we start a new transaction,
1490          * so reserving metadata failure is impossible
1491          */
1492         BUG_ON(ret);
1493
1494
1495         mutex_lock(&delayed_node->mutex);
1496         ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
1497         if (unlikely(ret)) {
1498                 btrfs_err(root->fs_info, "err add delayed dir index item(name: %.*s) "
1499                                 "into the insertion tree of the delayed node"
1500                                 "(root id: %llu, inode id: %llu, errno: %d)",
1501                                 name_len, name, delayed_node->root->objectid,
1502                                 delayed_node->inode_id, ret);
1503                 BUG();
1504         }
1505         mutex_unlock(&delayed_node->mutex);
1506
1507 release_node:
1508         btrfs_release_delayed_node(delayed_node);
1509         return ret;
1510 }
1511
1512 static int btrfs_delete_delayed_insertion_item(struct btrfs_root *root,
1513                                                struct btrfs_delayed_node *node,
1514                                                struct btrfs_key *key)
1515 {
1516         struct btrfs_delayed_item *item;
1517
1518         mutex_lock(&node->mutex);
1519         item = __btrfs_lookup_delayed_insertion_item(node, key);
1520         if (!item) {
1521                 mutex_unlock(&node->mutex);
1522                 return 1;
1523         }
1524
1525         btrfs_delayed_item_release_metadata(root, item);
1526         btrfs_release_delayed_item(item);
1527         mutex_unlock(&node->mutex);
1528         return 0;
1529 }
1530
1531 int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
1532                                    struct btrfs_root *root, struct inode *dir,
1533                                    u64 index)
1534 {
1535         struct btrfs_delayed_node *node;
1536         struct btrfs_delayed_item *item;
1537         struct btrfs_key item_key;
1538         int ret;
1539
1540         node = btrfs_get_or_create_delayed_node(dir);
1541         if (IS_ERR(node))
1542                 return PTR_ERR(node);
1543
1544         item_key.objectid = btrfs_ino(dir);
1545         item_key.type = BTRFS_DIR_INDEX_KEY;
1546         item_key.offset = index;
1547
1548         ret = btrfs_delete_delayed_insertion_item(root, node, &item_key);
1549         if (!ret)
1550                 goto end;
1551
1552         item = btrfs_alloc_delayed_item(0);
1553         if (!item) {
1554                 ret = -ENOMEM;
1555                 goto end;
1556         }
1557
1558         item->key = item_key;
1559
1560         ret = btrfs_delayed_item_reserve_metadata(trans, root, item);
1561         /*
1562          * we have reserved enough space when we start a new transaction,
1563          * so reserving metadata failure is impossible.
1564          */
1565         BUG_ON(ret);
1566
1567         mutex_lock(&node->mutex);
1568         ret = __btrfs_add_delayed_deletion_item(node, item);
1569         if (unlikely(ret)) {
1570                 btrfs_err(root->fs_info, "err add delayed dir index item(index: %llu) "
1571                                 "into the deletion tree of the delayed node"
1572                                 "(root id: %llu, inode id: %llu, errno: %d)",
1573                                 index, node->root->objectid, node->inode_id,
1574                                 ret);
1575                 BUG();
1576         }
1577         mutex_unlock(&node->mutex);
1578 end:
1579         btrfs_release_delayed_node(node);
1580         return ret;
1581 }
1582
1583 int btrfs_inode_delayed_dir_index_count(struct inode *inode)
1584 {
1585         struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1586
1587         if (!delayed_node)
1588                 return -ENOENT;
1589
1590         /*
1591          * Since we have held i_mutex of this directory, it is impossible that
1592          * a new directory index is added into the delayed node and index_cnt
1593          * is updated now. So we needn't lock the delayed node.
1594          */
1595         if (!delayed_node->index_cnt) {
1596                 btrfs_release_delayed_node(delayed_node);
1597                 return -EINVAL;
1598         }
1599
1600         BTRFS_I(inode)->index_cnt = delayed_node->index_cnt;
1601         btrfs_release_delayed_node(delayed_node);
1602         return 0;
1603 }
1604
1605 void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
1606                              struct list_head *del_list)
1607 {
1608         struct btrfs_delayed_node *delayed_node;
1609         struct btrfs_delayed_item *item;
1610
1611         delayed_node = btrfs_get_delayed_node(inode);
1612         if (!delayed_node)
1613                 return;
1614
1615         mutex_lock(&delayed_node->mutex);
1616         item = __btrfs_first_delayed_insertion_item(delayed_node);
1617         while (item) {
1618                 atomic_inc(&item->refs);
1619                 list_add_tail(&item->readdir_list, ins_list);
1620                 item = __btrfs_next_delayed_item(item);
1621         }
1622
1623         item = __btrfs_first_delayed_deletion_item(delayed_node);
1624         while (item) {
1625                 atomic_inc(&item->refs);
1626                 list_add_tail(&item->readdir_list, del_list);
1627                 item = __btrfs_next_delayed_item(item);
1628         }
1629         mutex_unlock(&delayed_node->mutex);
1630         /*
1631          * This delayed node is still cached in the btrfs inode, so refs
1632          * must be > 1 now, and we needn't check it is going to be freed
1633          * or not.
1634          *
1635          * Besides that, this function is used to read dir, we do not
1636          * insert/delete delayed items in this period. So we also needn't
1637          * requeue or dequeue this delayed node.
1638          */
1639         atomic_dec(&delayed_node->refs);
1640 }
1641
1642 void btrfs_put_delayed_items(struct list_head *ins_list,
1643                              struct list_head *del_list)
1644 {
1645         struct btrfs_delayed_item *curr, *next;
1646
1647         list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1648                 list_del(&curr->readdir_list);
1649                 if (atomic_dec_and_test(&curr->refs))
1650                         kfree(curr);
1651         }
1652
1653         list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1654                 list_del(&curr->readdir_list);
1655                 if (atomic_dec_and_test(&curr->refs))
1656                         kfree(curr);
1657         }
1658 }
1659
1660 int btrfs_should_delete_dir_index(struct list_head *del_list,
1661                                   u64 index)
1662 {
1663         struct btrfs_delayed_item *curr, *next;
1664         int ret;
1665
1666         if (list_empty(del_list))
1667                 return 0;
1668
1669         list_for_each_entry_safe(curr, next, del_list, readdir_list) {
1670                 if (curr->key.offset > index)
1671                         break;
1672
1673                 list_del(&curr->readdir_list);
1674                 ret = (curr->key.offset == index);
1675
1676                 if (atomic_dec_and_test(&curr->refs))
1677                         kfree(curr);
1678
1679                 if (ret)
1680                         return 1;
1681                 else
1682                         continue;
1683         }
1684         return 0;
1685 }
1686
1687 /*
1688  * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
1689  *
1690  */
1691 int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
1692                                     struct list_head *ins_list)
1693 {
1694         struct btrfs_dir_item *di;
1695         struct btrfs_delayed_item *curr, *next;
1696         struct btrfs_key location;
1697         char *name;
1698         int name_len;
1699         int over = 0;
1700         unsigned char d_type;
1701
1702         if (list_empty(ins_list))
1703                 return 0;
1704
1705         /*
1706          * Changing the data of the delayed item is impossible. So
1707          * we needn't lock them. And we have held i_mutex of the
1708          * directory, nobody can delete any directory indexes now.
1709          */
1710         list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
1711                 list_del(&curr->readdir_list);
1712
1713                 if (curr->key.offset < ctx->pos) {
1714                         if (atomic_dec_and_test(&curr->refs))
1715                                 kfree(curr);
1716                         continue;
1717                 }
1718
1719                 ctx->pos = curr->key.offset;
1720
1721                 di = (struct btrfs_dir_item *)curr->data;
1722                 name = (char *)(di + 1);
1723                 name_len = btrfs_stack_dir_name_len(di);
1724
1725                 d_type = btrfs_filetype_table[di->type];
1726                 btrfs_disk_key_to_cpu(&location, &di->location);
1727
1728                 over = !dir_emit(ctx, name, name_len,
1729                                location.objectid, d_type);
1730
1731                 if (atomic_dec_and_test(&curr->refs))
1732                         kfree(curr);
1733
1734                 if (over)
1735                         return 1;
1736         }
1737         return 0;
1738 }
1739
1740 static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
1741                                   struct btrfs_inode_item *inode_item,
1742                                   struct inode *inode)
1743 {
1744         btrfs_set_stack_inode_uid(inode_item, i_uid_read(inode));
1745         btrfs_set_stack_inode_gid(inode_item, i_gid_read(inode));
1746         btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
1747         btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
1748         btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
1749         btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
1750         btrfs_set_stack_inode_generation(inode_item,
1751                                          BTRFS_I(inode)->generation);
1752         btrfs_set_stack_inode_sequence(inode_item, inode->i_version);
1753         btrfs_set_stack_inode_transid(inode_item, trans->transid);
1754         btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
1755         btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
1756         btrfs_set_stack_inode_block_group(inode_item, 0);
1757
1758         btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item),
1759                                      inode->i_atime.tv_sec);
1760         btrfs_set_stack_timespec_nsec(btrfs_inode_atime(inode_item),
1761                                       inode->i_atime.tv_nsec);
1762
1763         btrfs_set_stack_timespec_sec(btrfs_inode_mtime(inode_item),
1764                                      inode->i_mtime.tv_sec);
1765         btrfs_set_stack_timespec_nsec(btrfs_inode_mtime(inode_item),
1766                                       inode->i_mtime.tv_nsec);
1767
1768         btrfs_set_stack_timespec_sec(btrfs_inode_ctime(inode_item),
1769                                      inode->i_ctime.tv_sec);
1770         btrfs_set_stack_timespec_nsec(btrfs_inode_ctime(inode_item),
1771                                       inode->i_ctime.tv_nsec);
1772 }
1773
1774 int btrfs_fill_inode(struct inode *inode, u32 *rdev)
1775 {
1776         struct btrfs_delayed_node *delayed_node;
1777         struct btrfs_inode_item *inode_item;
1778         struct btrfs_timespec *tspec;
1779
1780         delayed_node = btrfs_get_delayed_node(inode);
1781         if (!delayed_node)
1782                 return -ENOENT;
1783
1784         mutex_lock(&delayed_node->mutex);
1785         if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1786                 mutex_unlock(&delayed_node->mutex);
1787                 btrfs_release_delayed_node(delayed_node);
1788                 return -ENOENT;
1789         }
1790
1791         inode_item = &delayed_node->inode_item;
1792
1793         i_uid_write(inode, btrfs_stack_inode_uid(inode_item));
1794         i_gid_write(inode, btrfs_stack_inode_gid(inode_item));
1795         btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
1796         inode->i_mode = btrfs_stack_inode_mode(inode_item);
1797         set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
1798         inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
1799         BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
1800         inode->i_version = btrfs_stack_inode_sequence(inode_item);
1801         inode->i_rdev = 0;
1802         *rdev = btrfs_stack_inode_rdev(inode_item);
1803         BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
1804
1805         tspec = btrfs_inode_atime(inode_item);
1806         inode->i_atime.tv_sec = btrfs_stack_timespec_sec(tspec);
1807         inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
1808
1809         tspec = btrfs_inode_mtime(inode_item);
1810         inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(tspec);
1811         inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
1812
1813         tspec = btrfs_inode_ctime(inode_item);
1814         inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(tspec);
1815         inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
1816
1817         inode->i_generation = BTRFS_I(inode)->generation;
1818         BTRFS_I(inode)->index_cnt = (u64)-1;
1819
1820         mutex_unlock(&delayed_node->mutex);
1821         btrfs_release_delayed_node(delayed_node);
1822         return 0;
1823 }
1824
1825 int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
1826                                struct btrfs_root *root, struct inode *inode)
1827 {
1828         struct btrfs_delayed_node *delayed_node;
1829         int ret = 0;
1830
1831         delayed_node = btrfs_get_or_create_delayed_node(inode);
1832         if (IS_ERR(delayed_node))
1833                 return PTR_ERR(delayed_node);
1834
1835         mutex_lock(&delayed_node->mutex);
1836         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1837                 fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1838                 goto release_node;
1839         }
1840
1841         ret = btrfs_delayed_inode_reserve_metadata(trans, root, inode,
1842                                                    delayed_node);
1843         if (ret)
1844                 goto release_node;
1845
1846         fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
1847         set_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags);
1848         delayed_node->count++;
1849         atomic_inc(&root->fs_info->delayed_root->items);
1850 release_node:
1851         mutex_unlock(&delayed_node->mutex);
1852         btrfs_release_delayed_node(delayed_node);
1853         return ret;
1854 }
1855
1856 int btrfs_delayed_delete_inode_ref(struct inode *inode)
1857 {
1858         struct btrfs_delayed_node *delayed_node;
1859
1860         delayed_node = btrfs_get_or_create_delayed_node(inode);
1861         if (IS_ERR(delayed_node))
1862                 return PTR_ERR(delayed_node);
1863
1864         /*
1865          * We don't reserve space for inode ref deletion is because:
1866          * - We ONLY do async inode ref deletion for the inode who has only
1867          *   one link(i_nlink == 1), it means there is only one inode ref.
1868          *   And in most case, the inode ref and the inode item are in the
1869          *   same leaf, and we will deal with them at the same time.
1870          *   Since we are sure we will reserve the space for the inode item,
1871          *   it is unnecessary to reserve space for inode ref deletion.
1872          * - If the inode ref and the inode item are not in the same leaf,
1873          *   We also needn't worry about enospc problem, because we reserve
1874          *   much more space for the inode update than it needs.
1875          * - At the worst, we can steal some space from the global reservation.
1876          *   It is very rare.
1877          */
1878         mutex_lock(&delayed_node->mutex);
1879         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1880                 goto release_node;
1881
1882         set_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags);
1883         delayed_node->count++;
1884         atomic_inc(&BTRFS_I(inode)->root->fs_info->delayed_root->items);
1885 release_node:
1886         mutex_unlock(&delayed_node->mutex);
1887         btrfs_release_delayed_node(delayed_node);
1888         return 0;
1889 }
1890
1891 static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
1892 {
1893         struct btrfs_root *root = delayed_node->root;
1894         struct btrfs_delayed_item *curr_item, *prev_item;
1895
1896         mutex_lock(&delayed_node->mutex);
1897         curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
1898         while (curr_item) {
1899                 btrfs_delayed_item_release_metadata(root, curr_item);
1900                 prev_item = curr_item;
1901                 curr_item = __btrfs_next_delayed_item(prev_item);
1902                 btrfs_release_delayed_item(prev_item);
1903         }
1904
1905         curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
1906         while (curr_item) {
1907                 btrfs_delayed_item_release_metadata(root, curr_item);
1908                 prev_item = curr_item;
1909                 curr_item = __btrfs_next_delayed_item(prev_item);
1910                 btrfs_release_delayed_item(prev_item);
1911         }
1912
1913         if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &delayed_node->flags))
1914                 btrfs_release_delayed_iref(delayed_node);
1915
1916         if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, &delayed_node->flags)) {
1917                 btrfs_delayed_inode_release_metadata(root, delayed_node);
1918                 btrfs_release_delayed_inode(delayed_node);
1919         }
1920         mutex_unlock(&delayed_node->mutex);
1921 }
1922
1923 void btrfs_kill_delayed_inode_items(struct inode *inode)
1924 {
1925         struct btrfs_delayed_node *delayed_node;
1926
1927         delayed_node = btrfs_get_delayed_node(inode);
1928         if (!delayed_node)
1929                 return;
1930
1931         __btrfs_kill_delayed_node(delayed_node);
1932         btrfs_release_delayed_node(delayed_node);
1933 }
1934
1935 void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
1936 {
1937         u64 inode_id = 0;
1938         struct btrfs_delayed_node *delayed_nodes[8];
1939         int i, n;
1940
1941         while (1) {
1942                 spin_lock(&root->inode_lock);
1943                 n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
1944                                            (void **)delayed_nodes, inode_id,
1945                                            ARRAY_SIZE(delayed_nodes));
1946                 if (!n) {
1947                         spin_unlock(&root->inode_lock);
1948                         break;
1949                 }
1950
1951                 inode_id = delayed_nodes[n - 1]->inode_id + 1;
1952
1953                 for (i = 0; i < n; i++)
1954                         atomic_inc(&delayed_nodes[i]->refs);
1955                 spin_unlock(&root->inode_lock);
1956
1957                 for (i = 0; i < n; i++) {
1958                         __btrfs_kill_delayed_node(delayed_nodes[i]);
1959                         btrfs_release_delayed_node(delayed_nodes[i]);
1960                 }
1961         }
1962 }
1963
1964 void btrfs_destroy_delayed_inodes(struct btrfs_root *root)
1965 {
1966         struct btrfs_delayed_root *delayed_root;
1967         struct btrfs_delayed_node *curr_node, *prev_node;
1968
1969         delayed_root = btrfs_get_delayed_root(root);
1970
1971         curr_node = btrfs_first_delayed_node(delayed_root);
1972         while (curr_node) {
1973                 __btrfs_kill_delayed_node(curr_node);
1974
1975                 prev_node = curr_node;
1976                 curr_node = btrfs_next_delayed_node(curr_node);
1977                 btrfs_release_delayed_node(prev_node);
1978         }
1979 }
1980