nilfs2: remove redundant pointer checks in bmap lookup functions
[cascardo/linux.git] / fs / nilfs2 / btree.c
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Koji Sato <koji@osrg.net>.
21  */
22
23 #include <linux/slab.h>
24 #include <linux/string.h>
25 #include <linux/errno.h>
26 #include <linux/pagevec.h>
27 #include "nilfs.h"
28 #include "page.h"
29 #include "btnode.h"
30 #include "btree.h"
31 #include "alloc.h"
32 #include "dat.h"
33
34 static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
35 {
36         struct nilfs_btree_path *path;
37         int level = NILFS_BTREE_LEVEL_DATA;
38
39         path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
40         if (path == NULL)
41                 goto out;
42
43         for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
44                 path[level].bp_bh = NULL;
45                 path[level].bp_sib_bh = NULL;
46                 path[level].bp_index = 0;
47                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
48                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
49                 path[level].bp_op = NULL;
50         }
51
52 out:
53         return path;
54 }
55
56 static void nilfs_btree_free_path(struct nilfs_btree_path *path)
57 {
58         int level = NILFS_BTREE_LEVEL_DATA;
59
60         for (; level < NILFS_BTREE_LEVEL_MAX; level++)
61                 brelse(path[level].bp_bh);
62
63         kmem_cache_free(nilfs_btree_path_cache, path);
64 }
65
66 /*
67  * B-tree node operations
68  */
69 static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
70                                  struct buffer_head **bhp)
71 {
72         struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
73         struct buffer_head *bh;
74         int err;
75
76         err = nilfs_btnode_submit_block(btnc, ptr, 0, bhp);
77         if (err)
78                 return err == -EEXIST ? 0 : err;
79
80         bh = *bhp;
81         wait_on_buffer(bh);
82         if (!buffer_uptodate(bh)) {
83                 brelse(bh);
84                 return -EIO;
85         }
86         if (nilfs_btree_broken_node_block(bh)) {
87                 clear_buffer_uptodate(bh);
88                 brelse(bh);
89                 return -EINVAL;
90         }
91         return 0;
92 }
93
94 static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
95                                      __u64 ptr, struct buffer_head **bhp)
96 {
97         struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
98         struct buffer_head *bh;
99
100         bh = nilfs_btnode_create_block(btnc, ptr);
101         if (!bh)
102                 return -ENOMEM;
103
104         set_buffer_nilfs_volatile(bh);
105         *bhp = bh;
106         return 0;
107 }
108
109 static inline int
110 nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
111 {
112         return node->bn_flags;
113 }
114
115 static inline void
116 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
117 {
118         node->bn_flags = flags;
119 }
120
121 static inline int nilfs_btree_node_root(const struct nilfs_btree_node *node)
122 {
123         return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
124 }
125
126 static inline int
127 nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
128 {
129         return node->bn_level;
130 }
131
132 static inline void
133 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
134 {
135         node->bn_level = level;
136 }
137
138 static inline int
139 nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
140 {
141         return le16_to_cpu(node->bn_nchildren);
142 }
143
144 static inline void
145 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
146 {
147         node->bn_nchildren = cpu_to_le16(nchildren);
148 }
149
150 static inline int nilfs_btree_node_size(const struct nilfs_bmap *btree)
151 {
152         return 1 << btree->b_inode->i_blkbits;
153 }
154
155 static inline int
156 nilfs_btree_node_nchildren_min(const struct nilfs_btree_node *node,
157                                const struct nilfs_bmap *btree)
158 {
159         return nilfs_btree_node_root(node) ?
160                 NILFS_BTREE_ROOT_NCHILDREN_MIN :
161                 NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
162 }
163
164 static inline int
165 nilfs_btree_node_nchildren_max(const struct nilfs_btree_node *node,
166                                const struct nilfs_bmap *btree)
167 {
168         return nilfs_btree_node_root(node) ?
169                 NILFS_BTREE_ROOT_NCHILDREN_MAX :
170                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
171 }
172
173 static inline __le64 *
174 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
175 {
176         return (__le64 *)((char *)(node + 1) +
177                           (nilfs_btree_node_root(node) ?
178                            0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
179 }
180
181 static inline __le64 *
182 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node,
183                        const struct nilfs_bmap *btree)
184 {
185         return (__le64 *)(nilfs_btree_node_dkeys(node) +
186                           nilfs_btree_node_nchildren_max(node, btree));
187 }
188
189 static inline __u64
190 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
191 {
192         return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
193 }
194
195 static inline void
196 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
197 {
198         *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
199 }
200
201 static inline __u64
202 nilfs_btree_node_get_ptr(const struct nilfs_bmap *btree,
203                          const struct nilfs_btree_node *node, int index)
204 {
205         return le64_to_cpu(*(nilfs_btree_node_dptrs(node, btree) + index));
206 }
207
208 static inline void
209 nilfs_btree_node_set_ptr(struct nilfs_bmap *btree,
210                          struct nilfs_btree_node *node, int index, __u64 ptr)
211 {
212         *(nilfs_btree_node_dptrs(node, btree) + index) = cpu_to_le64(ptr);
213 }
214
215 static void nilfs_btree_node_init(struct nilfs_bmap *btree,
216                                   struct nilfs_btree_node *node,
217                                   int flags, int level, int nchildren,
218                                   const __u64 *keys, const __u64 *ptrs)
219 {
220         __le64 *dkeys;
221         __le64 *dptrs;
222         int i;
223
224         nilfs_btree_node_set_flags(node, flags);
225         nilfs_btree_node_set_level(node, level);
226         nilfs_btree_node_set_nchildren(node, nchildren);
227
228         dkeys = nilfs_btree_node_dkeys(node);
229         dptrs = nilfs_btree_node_dptrs(node, btree);
230         for (i = 0; i < nchildren; i++) {
231                 dkeys[i] = cpu_to_le64(keys[i]);
232                 dptrs[i] = cpu_to_le64(ptrs[i]);
233         }
234 }
235
236 /* Assume the buffer heads corresponding to left and right are locked. */
237 static void nilfs_btree_node_move_left(struct nilfs_bmap *btree,
238                                        struct nilfs_btree_node *left,
239                                        struct nilfs_btree_node *right,
240                                        int n)
241 {
242         __le64 *ldkeys, *rdkeys;
243         __le64 *ldptrs, *rdptrs;
244         int lnchildren, rnchildren;
245
246         ldkeys = nilfs_btree_node_dkeys(left);
247         ldptrs = nilfs_btree_node_dptrs(left, btree);
248         lnchildren = nilfs_btree_node_get_nchildren(left);
249
250         rdkeys = nilfs_btree_node_dkeys(right);
251         rdptrs = nilfs_btree_node_dptrs(right, btree);
252         rnchildren = nilfs_btree_node_get_nchildren(right);
253
254         memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
255         memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
256         memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
257         memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
258
259         lnchildren += n;
260         rnchildren -= n;
261         nilfs_btree_node_set_nchildren(left, lnchildren);
262         nilfs_btree_node_set_nchildren(right, rnchildren);
263 }
264
265 /* Assume that the buffer heads corresponding to left and right are locked. */
266 static void nilfs_btree_node_move_right(struct nilfs_bmap *btree,
267                                         struct nilfs_btree_node *left,
268                                         struct nilfs_btree_node *right,
269                                         int n)
270 {
271         __le64 *ldkeys, *rdkeys;
272         __le64 *ldptrs, *rdptrs;
273         int lnchildren, rnchildren;
274
275         ldkeys = nilfs_btree_node_dkeys(left);
276         ldptrs = nilfs_btree_node_dptrs(left, btree);
277         lnchildren = nilfs_btree_node_get_nchildren(left);
278
279         rdkeys = nilfs_btree_node_dkeys(right);
280         rdptrs = nilfs_btree_node_dptrs(right, btree);
281         rnchildren = nilfs_btree_node_get_nchildren(right);
282
283         memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
284         memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
285         memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
286         memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
287
288         lnchildren -= n;
289         rnchildren += n;
290         nilfs_btree_node_set_nchildren(left, lnchildren);
291         nilfs_btree_node_set_nchildren(right, rnchildren);
292 }
293
294 /* Assume that the buffer head corresponding to node is locked. */
295 static void nilfs_btree_node_insert(struct nilfs_bmap *btree,
296                                     struct nilfs_btree_node *node,
297                                     __u64 key, __u64 ptr, int index)
298 {
299         __le64 *dkeys;
300         __le64 *dptrs;
301         int nchildren;
302
303         dkeys = nilfs_btree_node_dkeys(node);
304         dptrs = nilfs_btree_node_dptrs(node, btree);
305         nchildren = nilfs_btree_node_get_nchildren(node);
306         if (index < nchildren) {
307                 memmove(dkeys + index + 1, dkeys + index,
308                         (nchildren - index) * sizeof(*dkeys));
309                 memmove(dptrs + index + 1, dptrs + index,
310                         (nchildren - index) * sizeof(*dptrs));
311         }
312         dkeys[index] = cpu_to_le64(key);
313         dptrs[index] = cpu_to_le64(ptr);
314         nchildren++;
315         nilfs_btree_node_set_nchildren(node, nchildren);
316 }
317
318 /* Assume that the buffer head corresponding to node is locked. */
319 static void nilfs_btree_node_delete(struct nilfs_bmap *btree,
320                                     struct nilfs_btree_node *node,
321                                     __u64 *keyp, __u64 *ptrp, int index)
322 {
323         __u64 key;
324         __u64 ptr;
325         __le64 *dkeys;
326         __le64 *dptrs;
327         int nchildren;
328
329         dkeys = nilfs_btree_node_dkeys(node);
330         dptrs = nilfs_btree_node_dptrs(node, btree);
331         key = le64_to_cpu(dkeys[index]);
332         ptr = le64_to_cpu(dptrs[index]);
333         nchildren = nilfs_btree_node_get_nchildren(node);
334         if (keyp != NULL)
335                 *keyp = key;
336         if (ptrp != NULL)
337                 *ptrp = ptr;
338
339         if (index < nchildren - 1) {
340                 memmove(dkeys + index, dkeys + index + 1,
341                         (nchildren - index - 1) * sizeof(*dkeys));
342                 memmove(dptrs + index, dptrs + index + 1,
343                         (nchildren - index - 1) * sizeof(*dptrs));
344         }
345         nchildren--;
346         nilfs_btree_node_set_nchildren(node, nchildren);
347 }
348
349 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
350                                    __u64 key, int *indexp)
351 {
352         __u64 nkey;
353         int index, low, high, s;
354
355         /* binary search */
356         low = 0;
357         high = nilfs_btree_node_get_nchildren(node) - 1;
358         index = 0;
359         s = 0;
360         while (low <= high) {
361                 index = (low + high) / 2;
362                 nkey = nilfs_btree_node_get_key(node, index);
363                 if (nkey == key) {
364                         s = 0;
365                         goto out;
366                 } else if (nkey < key) {
367                         low = index + 1;
368                         s = -1;
369                 } else {
370                         high = index - 1;
371                         s = 1;
372                 }
373         }
374
375         /* adjust index */
376         if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
377                 if (s > 0 && index > 0)
378                         index--;
379         } else if (s < 0)
380                 index++;
381
382  out:
383         *indexp = index;
384
385         return s == 0;
386 }
387
388 /**
389  * nilfs_btree_node_broken - verify consistency of btree node
390  * @node: btree node block to be examined
391  * @size: node size (in bytes)
392  * @blocknr: block number
393  *
394  * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
395  */
396 static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
397                                    size_t size, sector_t blocknr)
398 {
399         int level, flags, nchildren;
400         int ret = 0;
401
402         level = nilfs_btree_node_get_level(node);
403         flags = nilfs_btree_node_get_flags(node);
404         nchildren = nilfs_btree_node_get_nchildren(node);
405
406         if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
407                      level >= NILFS_BTREE_LEVEL_MAX ||
408                      (flags & NILFS_BTREE_NODE_ROOT) ||
409                      nchildren < 0 ||
410                      nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
411                 printk(KERN_CRIT "NILFS: bad btree node (blocknr=%llu): "
412                        "level = %d, flags = 0x%x, nchildren = %d\n",
413                        (unsigned long long)blocknr, level, flags, nchildren);
414                 ret = 1;
415         }
416         return ret;
417 }
418
419 int nilfs_btree_broken_node_block(struct buffer_head *bh)
420 {
421         return nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
422                                        bh->b_size, bh->b_blocknr);
423 }
424
425 static inline struct nilfs_btree_node *
426 nilfs_btree_get_root(const struct nilfs_bmap *btree)
427 {
428         return (struct nilfs_btree_node *)btree->b_u.u_data;
429 }
430
431 static inline struct nilfs_btree_node *
432 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
433 {
434         return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
435 }
436
437 static inline struct nilfs_btree_node *
438 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
439 {
440         return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
441 }
442
443 static inline int nilfs_btree_height(const struct nilfs_bmap *btree)
444 {
445         return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
446 }
447
448 static inline struct nilfs_btree_node *
449 nilfs_btree_get_node(const struct nilfs_bmap *btree,
450                      const struct nilfs_btree_path *path,
451                      int level)
452 {
453         return (level == nilfs_btree_height(btree) - 1) ?
454                 nilfs_btree_get_root(btree) :
455                 nilfs_btree_get_nonroot_node(path, level);
456 }
457
458 static inline int
459 nilfs_btree_bad_node(struct nilfs_btree_node *node, int level)
460 {
461         if (unlikely(nilfs_btree_node_get_level(node) != level)) {
462                 dump_stack();
463                 printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n",
464                        nilfs_btree_node_get_level(node), level);
465                 return 1;
466         }
467         return 0;
468 }
469
470 static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
471                                  struct nilfs_btree_path *path,
472                                  __u64 key, __u64 *ptrp, int minlevel)
473 {
474         struct nilfs_btree_node *node;
475         __u64 ptr;
476         int level, index, found, ret;
477
478         node = nilfs_btree_get_root(btree);
479         level = nilfs_btree_node_get_level(node);
480         if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
481                 return -ENOENT;
482
483         found = nilfs_btree_node_lookup(node, key, &index);
484         ptr = nilfs_btree_node_get_ptr(btree, node, index);
485         path[level].bp_bh = NULL;
486         path[level].bp_index = index;
487
488         for (level--; level >= minlevel; level--) {
489                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
490                 if (ret < 0)
491                         return ret;
492                 node = nilfs_btree_get_nonroot_node(path, level);
493                 if (nilfs_btree_bad_node(node, level))
494                         return -EINVAL;
495                 if (!found)
496                         found = nilfs_btree_node_lookup(node, key, &index);
497                 else
498                         index = 0;
499                 if (index < nilfs_btree_node_nchildren_max(node, btree))
500                         ptr = nilfs_btree_node_get_ptr(btree, node, index);
501                 else {
502                         WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
503                         /* insert */
504                         ptr = NILFS_BMAP_INVALID_PTR;
505                 }
506                 path[level].bp_index = index;
507         }
508         if (!found)
509                 return -ENOENT;
510
511         if (ptrp != NULL)
512                 *ptrp = ptr;
513
514         return 0;
515 }
516
517 static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
518                                       struct nilfs_btree_path *path,
519                                       __u64 *keyp, __u64 *ptrp)
520 {
521         struct nilfs_btree_node *node;
522         __u64 ptr;
523         int index, level, ret;
524
525         node = nilfs_btree_get_root(btree);
526         index = nilfs_btree_node_get_nchildren(node) - 1;
527         if (index < 0)
528                 return -ENOENT;
529         level = nilfs_btree_node_get_level(node);
530         ptr = nilfs_btree_node_get_ptr(btree, node, index);
531         path[level].bp_bh = NULL;
532         path[level].bp_index = index;
533
534         for (level--; level > 0; level--) {
535                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
536                 if (ret < 0)
537                         return ret;
538                 node = nilfs_btree_get_nonroot_node(path, level);
539                 if (nilfs_btree_bad_node(node, level))
540                         return -EINVAL;
541                 index = nilfs_btree_node_get_nchildren(node) - 1;
542                 ptr = nilfs_btree_node_get_ptr(btree, node, index);
543                 path[level].bp_index = index;
544         }
545
546         if (keyp != NULL)
547                 *keyp = nilfs_btree_node_get_key(node, index);
548         if (ptrp != NULL)
549                 *ptrp = ptr;
550
551         return 0;
552 }
553
554 static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
555                               __u64 key, int level, __u64 *ptrp)
556 {
557         struct nilfs_btree_path *path;
558         int ret;
559
560         path = nilfs_btree_alloc_path();
561         if (path == NULL)
562                 return -ENOMEM;
563
564         ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level);
565
566         nilfs_btree_free_path(path);
567
568         return ret;
569 }
570
571 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
572                                      __u64 key, __u64 *ptrp, unsigned maxblocks)
573 {
574         struct nilfs_btree_path *path;
575         struct nilfs_btree_node *node;
576         struct inode *dat = NULL;
577         __u64 ptr, ptr2;
578         sector_t blocknr;
579         int level = NILFS_BTREE_LEVEL_NODE_MIN;
580         int ret, cnt, index, maxlevel;
581
582         path = nilfs_btree_alloc_path();
583         if (path == NULL)
584                 return -ENOMEM;
585
586         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
587         if (ret < 0)
588                 goto out;
589
590         if (NILFS_BMAP_USE_VBN(btree)) {
591                 dat = nilfs_bmap_get_dat(btree);
592                 ret = nilfs_dat_translate(dat, ptr, &blocknr);
593                 if (ret < 0)
594                         goto out;
595                 ptr = blocknr;
596         }
597         cnt = 1;
598         if (cnt == maxblocks)
599                 goto end;
600
601         maxlevel = nilfs_btree_height(btree) - 1;
602         node = nilfs_btree_get_node(btree, path, level);
603         index = path[level].bp_index + 1;
604         for (;;) {
605                 while (index < nilfs_btree_node_get_nchildren(node)) {
606                         if (nilfs_btree_node_get_key(node, index) !=
607                             key + cnt)
608                                 goto end;
609                         ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
610                         if (dat) {
611                                 ret = nilfs_dat_translate(dat, ptr2, &blocknr);
612                                 if (ret < 0)
613                                         goto out;
614                                 ptr2 = blocknr;
615                         }
616                         if (ptr2 != ptr + cnt || ++cnt == maxblocks)
617                                 goto end;
618                         index++;
619                         continue;
620                 }
621                 if (level == maxlevel)
622                         break;
623
624                 /* look-up right sibling node */
625                 node = nilfs_btree_get_node(btree, path, level + 1);
626                 index = path[level + 1].bp_index + 1;
627                 if (index >= nilfs_btree_node_get_nchildren(node) ||
628                     nilfs_btree_node_get_key(node, index) != key + cnt)
629                         break;
630                 ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
631                 path[level + 1].bp_index = index;
632
633                 brelse(path[level].bp_bh);
634                 path[level].bp_bh = NULL;
635                 ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh);
636                 if (ret < 0)
637                         goto out;
638                 node = nilfs_btree_get_nonroot_node(path, level);
639                 index = 0;
640                 path[level].bp_index = index;
641         }
642  end:
643         *ptrp = ptr;
644         ret = cnt;
645  out:
646         nilfs_btree_free_path(path);
647         return ret;
648 }
649
650 static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
651                                     struct nilfs_btree_path *path,
652                                     int level, __u64 key)
653 {
654         if (level < nilfs_btree_height(btree) - 1) {
655                 do {
656                         nilfs_btree_node_set_key(
657                                 nilfs_btree_get_nonroot_node(path, level),
658                                 path[level].bp_index, key);
659                         if (!buffer_dirty(path[level].bp_bh))
660                                 nilfs_btnode_mark_dirty(path[level].bp_bh);
661                 } while ((path[level].bp_index == 0) &&
662                          (++level < nilfs_btree_height(btree) - 1));
663         }
664
665         /* root */
666         if (level == nilfs_btree_height(btree) - 1) {
667                 nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
668                                          path[level].bp_index, key);
669         }
670 }
671
672 static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
673                                   struct nilfs_btree_path *path,
674                                   int level, __u64 *keyp, __u64 *ptrp)
675 {
676         struct nilfs_btree_node *node;
677
678         if (level < nilfs_btree_height(btree) - 1) {
679                 node = nilfs_btree_get_nonroot_node(path, level);
680                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
681                                         path[level].bp_index);
682                 if (!buffer_dirty(path[level].bp_bh))
683                         nilfs_btnode_mark_dirty(path[level].bp_bh);
684
685                 if (path[level].bp_index == 0)
686                         nilfs_btree_promote_key(btree, path, level + 1,
687                                                 nilfs_btree_node_get_key(node,
688                                                                          0));
689         } else {
690                 node = nilfs_btree_get_root(btree);
691                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
692                                         path[level].bp_index);
693         }
694 }
695
696 static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
697                                    struct nilfs_btree_path *path,
698                                    int level, __u64 *keyp, __u64 *ptrp)
699 {
700         struct nilfs_btree_node *node, *left;
701         int nchildren, lnchildren, n, move;
702
703         node = nilfs_btree_get_nonroot_node(path, level);
704         left = nilfs_btree_get_sib_node(path, level);
705         nchildren = nilfs_btree_node_get_nchildren(node);
706         lnchildren = nilfs_btree_node_get_nchildren(left);
707         move = 0;
708
709         n = (nchildren + lnchildren + 1) / 2 - lnchildren;
710         if (n > path[level].bp_index) {
711                 /* move insert point */
712                 n--;
713                 move = 1;
714         }
715
716         nilfs_btree_node_move_left(btree, left, node, n);
717
718         if (!buffer_dirty(path[level].bp_bh))
719                 nilfs_btnode_mark_dirty(path[level].bp_bh);
720         if (!buffer_dirty(path[level].bp_sib_bh))
721                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
722
723         nilfs_btree_promote_key(btree, path, level + 1,
724                                 nilfs_btree_node_get_key(node, 0));
725
726         if (move) {
727                 brelse(path[level].bp_bh);
728                 path[level].bp_bh = path[level].bp_sib_bh;
729                 path[level].bp_sib_bh = NULL;
730                 path[level].bp_index += lnchildren;
731                 path[level + 1].bp_index--;
732         } else {
733                 brelse(path[level].bp_sib_bh);
734                 path[level].bp_sib_bh = NULL;
735                 path[level].bp_index -= n;
736         }
737
738         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
739 }
740
741 static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
742                                     struct nilfs_btree_path *path,
743                                     int level, __u64 *keyp, __u64 *ptrp)
744 {
745         struct nilfs_btree_node *node, *right;
746         int nchildren, rnchildren, n, move;
747
748         node = nilfs_btree_get_nonroot_node(path, level);
749         right = nilfs_btree_get_sib_node(path, level);
750         nchildren = nilfs_btree_node_get_nchildren(node);
751         rnchildren = nilfs_btree_node_get_nchildren(right);
752         move = 0;
753
754         n = (nchildren + rnchildren + 1) / 2 - rnchildren;
755         if (n > nchildren - path[level].bp_index) {
756                 /* move insert point */
757                 n--;
758                 move = 1;
759         }
760
761         nilfs_btree_node_move_right(btree, node, right, n);
762
763         if (!buffer_dirty(path[level].bp_bh))
764                 nilfs_btnode_mark_dirty(path[level].bp_bh);
765         if (!buffer_dirty(path[level].bp_sib_bh))
766                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
767
768         path[level + 1].bp_index++;
769         nilfs_btree_promote_key(btree, path, level + 1,
770                                 nilfs_btree_node_get_key(right, 0));
771         path[level + 1].bp_index--;
772
773         if (move) {
774                 brelse(path[level].bp_bh);
775                 path[level].bp_bh = path[level].bp_sib_bh;
776                 path[level].bp_sib_bh = NULL;
777                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
778                 path[level + 1].bp_index++;
779         } else {
780                 brelse(path[level].bp_sib_bh);
781                 path[level].bp_sib_bh = NULL;
782         }
783
784         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
785 }
786
787 static void nilfs_btree_split(struct nilfs_bmap *btree,
788                               struct nilfs_btree_path *path,
789                               int level, __u64 *keyp, __u64 *ptrp)
790 {
791         struct nilfs_btree_node *node, *right;
792         __u64 newkey;
793         __u64 newptr;
794         int nchildren, n, move;
795
796         node = nilfs_btree_get_nonroot_node(path, level);
797         right = nilfs_btree_get_sib_node(path, level);
798         nchildren = nilfs_btree_node_get_nchildren(node);
799         move = 0;
800
801         n = (nchildren + 1) / 2;
802         if (n > nchildren - path[level].bp_index) {
803                 n--;
804                 move = 1;
805         }
806
807         nilfs_btree_node_move_right(btree, node, right, n);
808
809         if (!buffer_dirty(path[level].bp_bh))
810                 nilfs_btnode_mark_dirty(path[level].bp_bh);
811         if (!buffer_dirty(path[level].bp_sib_bh))
812                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
813
814         newkey = nilfs_btree_node_get_key(right, 0);
815         newptr = path[level].bp_newreq.bpr_ptr;
816
817         if (move) {
818                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
819                 nilfs_btree_node_insert(btree, right, *keyp, *ptrp,
820                                         path[level].bp_index);
821
822                 *keyp = nilfs_btree_node_get_key(right, 0);
823                 *ptrp = path[level].bp_newreq.bpr_ptr;
824
825                 brelse(path[level].bp_bh);
826                 path[level].bp_bh = path[level].bp_sib_bh;
827                 path[level].bp_sib_bh = NULL;
828         } else {
829                 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
830
831                 *keyp = nilfs_btree_node_get_key(right, 0);
832                 *ptrp = path[level].bp_newreq.bpr_ptr;
833
834                 brelse(path[level].bp_sib_bh);
835                 path[level].bp_sib_bh = NULL;
836         }
837
838         path[level + 1].bp_index++;
839 }
840
841 static void nilfs_btree_grow(struct nilfs_bmap *btree,
842                              struct nilfs_btree_path *path,
843                              int level, __u64 *keyp, __u64 *ptrp)
844 {
845         struct nilfs_btree_node *root, *child;
846         int n;
847
848         root = nilfs_btree_get_root(btree);
849         child = nilfs_btree_get_sib_node(path, level);
850
851         n = nilfs_btree_node_get_nchildren(root);
852
853         nilfs_btree_node_move_right(btree, root, child, n);
854         nilfs_btree_node_set_level(root, level + 1);
855
856         if (!buffer_dirty(path[level].bp_sib_bh))
857                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
858
859         path[level].bp_bh = path[level].bp_sib_bh;
860         path[level].bp_sib_bh = NULL;
861
862         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
863
864         *keyp = nilfs_btree_node_get_key(child, 0);
865         *ptrp = path[level].bp_newreq.bpr_ptr;
866 }
867
868 static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
869                                    const struct nilfs_btree_path *path)
870 {
871         struct nilfs_btree_node *node;
872         int level;
873
874         if (path == NULL)
875                 return NILFS_BMAP_INVALID_PTR;
876
877         /* left sibling */
878         level = NILFS_BTREE_LEVEL_NODE_MIN;
879         if (path[level].bp_index > 0) {
880                 node = nilfs_btree_get_node(btree, path, level);
881                 return nilfs_btree_node_get_ptr(btree, node,
882                                                 path[level].bp_index - 1);
883         }
884
885         /* parent */
886         level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
887         if (level <= nilfs_btree_height(btree) - 1) {
888                 node = nilfs_btree_get_node(btree, path, level);
889                 return nilfs_btree_node_get_ptr(btree, node,
890                                                 path[level].bp_index);
891         }
892
893         return NILFS_BMAP_INVALID_PTR;
894 }
895
896 static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
897                                        const struct nilfs_btree_path *path,
898                                        __u64 key)
899 {
900         __u64 ptr;
901
902         ptr = nilfs_bmap_find_target_seq(btree, key);
903         if (ptr != NILFS_BMAP_INVALID_PTR)
904                 /* sequential access */
905                 return ptr;
906         else {
907                 ptr = nilfs_btree_find_near(btree, path);
908                 if (ptr != NILFS_BMAP_INVALID_PTR)
909                         /* near */
910                         return ptr;
911         }
912         /* block group */
913         return nilfs_bmap_find_target_in_group(btree);
914 }
915
916 static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
917                                       struct nilfs_btree_path *path,
918                                       int *levelp, __u64 key, __u64 ptr,
919                                       struct nilfs_bmap_stats *stats)
920 {
921         struct buffer_head *bh;
922         struct nilfs_btree_node *node, *parent, *sib;
923         __u64 sibptr;
924         int pindex, level, ret;
925         struct inode *dat = NULL;
926
927         stats->bs_nblocks = 0;
928         level = NILFS_BTREE_LEVEL_DATA;
929
930         /* allocate a new ptr for data block */
931         if (NILFS_BMAP_USE_VBN(btree)) {
932                 path[level].bp_newreq.bpr_ptr =
933                         nilfs_btree_find_target_v(btree, path, key);
934                 dat = nilfs_bmap_get_dat(btree);
935         }
936
937         ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
938         if (ret < 0)
939                 goto err_out_data;
940
941         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
942              level < nilfs_btree_height(btree) - 1;
943              level++) {
944                 node = nilfs_btree_get_nonroot_node(path, level);
945                 if (nilfs_btree_node_get_nchildren(node) <
946                     nilfs_btree_node_nchildren_max(node, btree)) {
947                         path[level].bp_op = nilfs_btree_do_insert;
948                         stats->bs_nblocks++;
949                         goto out;
950                 }
951
952                 parent = nilfs_btree_get_node(btree, path, level + 1);
953                 pindex = path[level + 1].bp_index;
954
955                 /* left sibling */
956                 if (pindex > 0) {
957                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
958                                                           pindex - 1);
959                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
960                         if (ret < 0)
961                                 goto err_out_child_node;
962                         sib = (struct nilfs_btree_node *)bh->b_data;
963                         if (nilfs_btree_node_get_nchildren(sib) <
964                             nilfs_btree_node_nchildren_max(sib, btree)) {
965                                 path[level].bp_sib_bh = bh;
966                                 path[level].bp_op = nilfs_btree_carry_left;
967                                 stats->bs_nblocks++;
968                                 goto out;
969                         } else
970                                 brelse(bh);
971                 }
972
973                 /* right sibling */
974                 if (pindex <
975                     nilfs_btree_node_get_nchildren(parent) - 1) {
976                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
977                                                           pindex + 1);
978                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
979                         if (ret < 0)
980                                 goto err_out_child_node;
981                         sib = (struct nilfs_btree_node *)bh->b_data;
982                         if (nilfs_btree_node_get_nchildren(sib) <
983                             nilfs_btree_node_nchildren_max(sib, btree)) {
984                                 path[level].bp_sib_bh = bh;
985                                 path[level].bp_op = nilfs_btree_carry_right;
986                                 stats->bs_nblocks++;
987                                 goto out;
988                         } else
989                                 brelse(bh);
990                 }
991
992                 /* split */
993                 path[level].bp_newreq.bpr_ptr =
994                         path[level - 1].bp_newreq.bpr_ptr + 1;
995                 ret = nilfs_bmap_prepare_alloc_ptr(btree,
996                                                    &path[level].bp_newreq, dat);
997                 if (ret < 0)
998                         goto err_out_child_node;
999                 ret = nilfs_btree_get_new_block(btree,
1000                                                 path[level].bp_newreq.bpr_ptr,
1001                                                 &bh);
1002                 if (ret < 0)
1003                         goto err_out_curr_node;
1004
1005                 stats->bs_nblocks++;
1006
1007                 nilfs_btree_node_init(btree,
1008                                       (struct nilfs_btree_node *)bh->b_data,
1009                                       0, level, 0, NULL, NULL);
1010                 path[level].bp_sib_bh = bh;
1011                 path[level].bp_op = nilfs_btree_split;
1012         }
1013
1014         /* root */
1015         node = nilfs_btree_get_root(btree);
1016         if (nilfs_btree_node_get_nchildren(node) <
1017             nilfs_btree_node_nchildren_max(node, btree)) {
1018                 path[level].bp_op = nilfs_btree_do_insert;
1019                 stats->bs_nblocks++;
1020                 goto out;
1021         }
1022
1023         /* grow */
1024         path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1025         ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1026         if (ret < 0)
1027                 goto err_out_child_node;
1028         ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1029                                         &bh);
1030         if (ret < 0)
1031                 goto err_out_curr_node;
1032
1033         nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data,
1034                               0, level, 0, NULL, NULL);
1035         path[level].bp_sib_bh = bh;
1036         path[level].bp_op = nilfs_btree_grow;
1037
1038         level++;
1039         path[level].bp_op = nilfs_btree_do_insert;
1040
1041         /* a newly-created node block and a data block are added */
1042         stats->bs_nblocks += 2;
1043
1044         /* success */
1045  out:
1046         *levelp = level;
1047         return ret;
1048
1049         /* error */
1050  err_out_curr_node:
1051         nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1052  err_out_child_node:
1053         for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1054                 nilfs_btnode_delete(path[level].bp_sib_bh);
1055                 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1056
1057         }
1058
1059         nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1060  err_out_data:
1061         *levelp = level;
1062         stats->bs_nblocks = 0;
1063         return ret;
1064 }
1065
1066 static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1067                                       struct nilfs_btree_path *path,
1068                                       int maxlevel, __u64 key, __u64 ptr)
1069 {
1070         struct inode *dat = NULL;
1071         int level;
1072
1073         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1074         ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1075         if (NILFS_BMAP_USE_VBN(btree)) {
1076                 nilfs_bmap_set_target_v(btree, key, ptr);
1077                 dat = nilfs_bmap_get_dat(btree);
1078         }
1079
1080         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1081                 nilfs_bmap_commit_alloc_ptr(btree,
1082                                             &path[level - 1].bp_newreq, dat);
1083                 path[level].bp_op(btree, path, level, &key, &ptr);
1084         }
1085
1086         if (!nilfs_bmap_dirty(btree))
1087                 nilfs_bmap_set_dirty(btree);
1088 }
1089
1090 static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1091 {
1092         struct nilfs_btree_path *path;
1093         struct nilfs_bmap_stats stats;
1094         int level, ret;
1095
1096         path = nilfs_btree_alloc_path();
1097         if (path == NULL)
1098                 return -ENOMEM;
1099
1100         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1101                                     NILFS_BTREE_LEVEL_NODE_MIN);
1102         if (ret != -ENOENT) {
1103                 if (ret == 0)
1104                         ret = -EEXIST;
1105                 goto out;
1106         }
1107
1108         ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1109         if (ret < 0)
1110                 goto out;
1111         nilfs_btree_commit_insert(btree, path, level, key, ptr);
1112         nilfs_bmap_add_blocks(btree, stats.bs_nblocks);
1113
1114  out:
1115         nilfs_btree_free_path(path);
1116         return ret;
1117 }
1118
1119 static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1120                                   struct nilfs_btree_path *path,
1121                                   int level, __u64 *keyp, __u64 *ptrp)
1122 {
1123         struct nilfs_btree_node *node;
1124
1125         if (level < nilfs_btree_height(btree) - 1) {
1126                 node = nilfs_btree_get_nonroot_node(path, level);
1127                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1128                                         path[level].bp_index);
1129                 if (!buffer_dirty(path[level].bp_bh))
1130                         nilfs_btnode_mark_dirty(path[level].bp_bh);
1131                 if (path[level].bp_index == 0)
1132                         nilfs_btree_promote_key(btree, path, level + 1,
1133                                 nilfs_btree_node_get_key(node, 0));
1134         } else {
1135                 node = nilfs_btree_get_root(btree);
1136                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1137                                         path[level].bp_index);
1138         }
1139 }
1140
1141 static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1142                                     struct nilfs_btree_path *path,
1143                                     int level, __u64 *keyp, __u64 *ptrp)
1144 {
1145         struct nilfs_btree_node *node, *left;
1146         int nchildren, lnchildren, n;
1147
1148         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1149
1150         node = nilfs_btree_get_nonroot_node(path, level);
1151         left = nilfs_btree_get_sib_node(path, level);
1152         nchildren = nilfs_btree_node_get_nchildren(node);
1153         lnchildren = nilfs_btree_node_get_nchildren(left);
1154
1155         n = (nchildren + lnchildren) / 2 - nchildren;
1156
1157         nilfs_btree_node_move_right(btree, left, node, n);
1158
1159         if (!buffer_dirty(path[level].bp_bh))
1160                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1161         if (!buffer_dirty(path[level].bp_sib_bh))
1162                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1163
1164         nilfs_btree_promote_key(btree, path, level + 1,
1165                                 nilfs_btree_node_get_key(node, 0));
1166
1167         brelse(path[level].bp_sib_bh);
1168         path[level].bp_sib_bh = NULL;
1169         path[level].bp_index += n;
1170 }
1171
1172 static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1173                                      struct nilfs_btree_path *path,
1174                                      int level, __u64 *keyp, __u64 *ptrp)
1175 {
1176         struct nilfs_btree_node *node, *right;
1177         int nchildren, rnchildren, n;
1178
1179         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1180
1181         node = nilfs_btree_get_nonroot_node(path, level);
1182         right = nilfs_btree_get_sib_node(path, level);
1183         nchildren = nilfs_btree_node_get_nchildren(node);
1184         rnchildren = nilfs_btree_node_get_nchildren(right);
1185
1186         n = (nchildren + rnchildren) / 2 - nchildren;
1187
1188         nilfs_btree_node_move_left(btree, node, right, n);
1189
1190         if (!buffer_dirty(path[level].bp_bh))
1191                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1192         if (!buffer_dirty(path[level].bp_sib_bh))
1193                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1194
1195         path[level + 1].bp_index++;
1196         nilfs_btree_promote_key(btree, path, level + 1,
1197                                 nilfs_btree_node_get_key(right, 0));
1198         path[level + 1].bp_index--;
1199
1200         brelse(path[level].bp_sib_bh);
1201         path[level].bp_sib_bh = NULL;
1202 }
1203
1204 static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1205                                     struct nilfs_btree_path *path,
1206                                     int level, __u64 *keyp, __u64 *ptrp)
1207 {
1208         struct nilfs_btree_node *node, *left;
1209         int n;
1210
1211         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1212
1213         node = nilfs_btree_get_nonroot_node(path, level);
1214         left = nilfs_btree_get_sib_node(path, level);
1215
1216         n = nilfs_btree_node_get_nchildren(node);
1217
1218         nilfs_btree_node_move_left(btree, left, node, n);
1219
1220         if (!buffer_dirty(path[level].bp_sib_bh))
1221                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1222
1223         nilfs_btnode_delete(path[level].bp_bh);
1224         path[level].bp_bh = path[level].bp_sib_bh;
1225         path[level].bp_sib_bh = NULL;
1226         path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1227 }
1228
1229 static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1230                                      struct nilfs_btree_path *path,
1231                                      int level, __u64 *keyp, __u64 *ptrp)
1232 {
1233         struct nilfs_btree_node *node, *right;
1234         int n;
1235
1236         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1237
1238         node = nilfs_btree_get_nonroot_node(path, level);
1239         right = nilfs_btree_get_sib_node(path, level);
1240
1241         n = nilfs_btree_node_get_nchildren(right);
1242
1243         nilfs_btree_node_move_left(btree, node, right, n);
1244
1245         if (!buffer_dirty(path[level].bp_bh))
1246                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1247
1248         nilfs_btnode_delete(path[level].bp_sib_bh);
1249         path[level].bp_sib_bh = NULL;
1250         path[level + 1].bp_index++;
1251 }
1252
1253 static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1254                                struct nilfs_btree_path *path,
1255                                int level, __u64 *keyp, __u64 *ptrp)
1256 {
1257         struct nilfs_btree_node *root, *child;
1258         int n;
1259
1260         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1261
1262         root = nilfs_btree_get_root(btree);
1263         child = nilfs_btree_get_nonroot_node(path, level);
1264
1265         nilfs_btree_node_delete(btree, root, NULL, NULL, 0);
1266         nilfs_btree_node_set_level(root, level);
1267         n = nilfs_btree_node_get_nchildren(child);
1268         nilfs_btree_node_move_left(btree, root, child, n);
1269
1270         nilfs_btnode_delete(path[level].bp_bh);
1271         path[level].bp_bh = NULL;
1272 }
1273
1274
1275 static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1276                                       struct nilfs_btree_path *path,
1277                                       int *levelp,
1278                                       struct nilfs_bmap_stats *stats,
1279                                       struct inode *dat)
1280 {
1281         struct buffer_head *bh;
1282         struct nilfs_btree_node *node, *parent, *sib;
1283         __u64 sibptr;
1284         int pindex, level, ret;
1285
1286         ret = 0;
1287         stats->bs_nblocks = 0;
1288         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1289              level < nilfs_btree_height(btree) - 1;
1290              level++) {
1291                 node = nilfs_btree_get_nonroot_node(path, level);
1292                 path[level].bp_oldreq.bpr_ptr =
1293                         nilfs_btree_node_get_ptr(btree, node,
1294                                                  path[level].bp_index);
1295                 ret = nilfs_bmap_prepare_end_ptr(btree,
1296                                                  &path[level].bp_oldreq, dat);
1297                 if (ret < 0)
1298                         goto err_out_child_node;
1299
1300                 if (nilfs_btree_node_get_nchildren(node) >
1301                     nilfs_btree_node_nchildren_min(node, btree)) {
1302                         path[level].bp_op = nilfs_btree_do_delete;
1303                         stats->bs_nblocks++;
1304                         goto out;
1305                 }
1306
1307                 parent = nilfs_btree_get_node(btree, path, level + 1);
1308                 pindex = path[level + 1].bp_index;
1309
1310                 if (pindex > 0) {
1311                         /* left sibling */
1312                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1313                                                           pindex - 1);
1314                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1315                         if (ret < 0)
1316                                 goto err_out_curr_node;
1317                         sib = (struct nilfs_btree_node *)bh->b_data;
1318                         if (nilfs_btree_node_get_nchildren(sib) >
1319                             nilfs_btree_node_nchildren_min(sib, btree)) {
1320                                 path[level].bp_sib_bh = bh;
1321                                 path[level].bp_op = nilfs_btree_borrow_left;
1322                                 stats->bs_nblocks++;
1323                                 goto out;
1324                         } else {
1325                                 path[level].bp_sib_bh = bh;
1326                                 path[level].bp_op = nilfs_btree_concat_left;
1327                                 stats->bs_nblocks++;
1328                                 /* continue; */
1329                         }
1330                 } else if (pindex <
1331                            nilfs_btree_node_get_nchildren(parent) - 1) {
1332                         /* right sibling */
1333                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1334                                                           pindex + 1);
1335                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1336                         if (ret < 0)
1337                                 goto err_out_curr_node;
1338                         sib = (struct nilfs_btree_node *)bh->b_data;
1339                         if (nilfs_btree_node_get_nchildren(sib) >
1340                             nilfs_btree_node_nchildren_min(sib, btree)) {
1341                                 path[level].bp_sib_bh = bh;
1342                                 path[level].bp_op = nilfs_btree_borrow_right;
1343                                 stats->bs_nblocks++;
1344                                 goto out;
1345                         } else {
1346                                 path[level].bp_sib_bh = bh;
1347                                 path[level].bp_op = nilfs_btree_concat_right;
1348                                 stats->bs_nblocks++;
1349                                 /* continue; */
1350                         }
1351                 } else {
1352                         /* no siblings */
1353                         /* the only child of the root node */
1354                         WARN_ON(level != nilfs_btree_height(btree) - 2);
1355                         if (nilfs_btree_node_get_nchildren(node) - 1 <=
1356                             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1357                                 path[level].bp_op = nilfs_btree_shrink;
1358                                 stats->bs_nblocks += 2;
1359                         } else {
1360                                 path[level].bp_op = nilfs_btree_do_delete;
1361                                 stats->bs_nblocks++;
1362                         }
1363
1364                         goto out;
1365
1366                 }
1367         }
1368
1369         node = nilfs_btree_get_root(btree);
1370         path[level].bp_oldreq.bpr_ptr =
1371                 nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
1372
1373         ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1374         if (ret < 0)
1375                 goto err_out_child_node;
1376
1377         /* child of the root node is deleted */
1378         path[level].bp_op = nilfs_btree_do_delete;
1379         stats->bs_nblocks++;
1380
1381         /* success */
1382  out:
1383         *levelp = level;
1384         return ret;
1385
1386         /* error */
1387  err_out_curr_node:
1388         nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1389  err_out_child_node:
1390         for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1391                 brelse(path[level].bp_sib_bh);
1392                 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1393         }
1394         *levelp = level;
1395         stats->bs_nblocks = 0;
1396         return ret;
1397 }
1398
1399 static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1400                                       struct nilfs_btree_path *path,
1401                                       int maxlevel, struct inode *dat)
1402 {
1403         int level;
1404
1405         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1406                 nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1407                 path[level].bp_op(btree, path, level, NULL, NULL);
1408         }
1409
1410         if (!nilfs_bmap_dirty(btree))
1411                 nilfs_bmap_set_dirty(btree);
1412 }
1413
1414 static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1415
1416 {
1417         struct nilfs_btree_path *path;
1418         struct nilfs_bmap_stats stats;
1419         struct inode *dat;
1420         int level, ret;
1421
1422         path = nilfs_btree_alloc_path();
1423         if (path == NULL)
1424                 return -ENOMEM;
1425
1426         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1427                                     NILFS_BTREE_LEVEL_NODE_MIN);
1428         if (ret < 0)
1429                 goto out;
1430
1431
1432         dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1433
1434         ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1435         if (ret < 0)
1436                 goto out;
1437         nilfs_btree_commit_delete(btree, path, level, dat);
1438         nilfs_bmap_sub_blocks(btree, stats.bs_nblocks);
1439
1440 out:
1441         nilfs_btree_free_path(path);
1442         return ret;
1443 }
1444
1445 static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1446 {
1447         struct nilfs_btree_path *path;
1448         int ret;
1449
1450         path = nilfs_btree_alloc_path();
1451         if (path == NULL)
1452                 return -ENOMEM;
1453
1454         ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1455
1456         nilfs_btree_free_path(path);
1457
1458         return ret;
1459 }
1460
1461 static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1462 {
1463         struct buffer_head *bh;
1464         struct nilfs_btree_node *root, *node;
1465         __u64 maxkey, nextmaxkey;
1466         __u64 ptr;
1467         int nchildren, ret;
1468
1469         root = nilfs_btree_get_root(btree);
1470         switch (nilfs_btree_height(btree)) {
1471         case 2:
1472                 bh = NULL;
1473                 node = root;
1474                 break;
1475         case 3:
1476                 nchildren = nilfs_btree_node_get_nchildren(root);
1477                 if (nchildren > 1)
1478                         return 0;
1479                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1480                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1481                 if (ret < 0)
1482                         return ret;
1483                 node = (struct nilfs_btree_node *)bh->b_data;
1484                 break;
1485         default:
1486                 return 0;
1487         }
1488
1489         nchildren = nilfs_btree_node_get_nchildren(node);
1490         maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1491         nextmaxkey = (nchildren > 1) ?
1492                 nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1493         if (bh != NULL)
1494                 brelse(bh);
1495
1496         return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1497 }
1498
1499 static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1500                                    __u64 *keys, __u64 *ptrs, int nitems)
1501 {
1502         struct buffer_head *bh;
1503         struct nilfs_btree_node *node, *root;
1504         __le64 *dkeys;
1505         __le64 *dptrs;
1506         __u64 ptr;
1507         int nchildren, i, ret;
1508
1509         root = nilfs_btree_get_root(btree);
1510         switch (nilfs_btree_height(btree)) {
1511         case 2:
1512                 bh = NULL;
1513                 node = root;
1514                 break;
1515         case 3:
1516                 nchildren = nilfs_btree_node_get_nchildren(root);
1517                 WARN_ON(nchildren > 1);
1518                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1519                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1520                 if (ret < 0)
1521                         return ret;
1522                 node = (struct nilfs_btree_node *)bh->b_data;
1523                 break;
1524         default:
1525                 node = NULL;
1526                 return -EINVAL;
1527         }
1528
1529         nchildren = nilfs_btree_node_get_nchildren(node);
1530         if (nchildren < nitems)
1531                 nitems = nchildren;
1532         dkeys = nilfs_btree_node_dkeys(node);
1533         dptrs = nilfs_btree_node_dptrs(node, btree);
1534         for (i = 0; i < nitems; i++) {
1535                 keys[i] = le64_to_cpu(dkeys[i]);
1536                 ptrs[i] = le64_to_cpu(dptrs[i]);
1537         }
1538
1539         if (bh != NULL)
1540                 brelse(bh);
1541
1542         return nitems;
1543 }
1544
1545 static int
1546 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1547                                        union nilfs_bmap_ptr_req *dreq,
1548                                        union nilfs_bmap_ptr_req *nreq,
1549                                        struct buffer_head **bhp,
1550                                        struct nilfs_bmap_stats *stats)
1551 {
1552         struct buffer_head *bh;
1553         struct inode *dat = NULL;
1554         int ret;
1555
1556         stats->bs_nblocks = 0;
1557
1558         /* for data */
1559         /* cannot find near ptr */
1560         if (NILFS_BMAP_USE_VBN(btree)) {
1561                 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1562                 dat = nilfs_bmap_get_dat(btree);
1563         }
1564
1565         ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1566         if (ret < 0)
1567                 return ret;
1568
1569         *bhp = NULL;
1570         stats->bs_nblocks++;
1571         if (nreq != NULL) {
1572                 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1573                 ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1574                 if (ret < 0)
1575                         goto err_out_dreq;
1576
1577                 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1578                 if (ret < 0)
1579                         goto err_out_nreq;
1580
1581                 *bhp = bh;
1582                 stats->bs_nblocks++;
1583         }
1584
1585         /* success */
1586         return 0;
1587
1588         /* error */
1589  err_out_nreq:
1590         nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1591  err_out_dreq:
1592         nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1593         stats->bs_nblocks = 0;
1594         return ret;
1595
1596 }
1597
1598 static void
1599 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1600                                       __u64 key, __u64 ptr,
1601                                       const __u64 *keys, const __u64 *ptrs,
1602                                       int n,
1603                                       union nilfs_bmap_ptr_req *dreq,
1604                                       union nilfs_bmap_ptr_req *nreq,
1605                                       struct buffer_head *bh)
1606 {
1607         struct nilfs_btree_node *node;
1608         struct inode *dat;
1609         __u64 tmpptr;
1610
1611         /* free resources */
1612         if (btree->b_ops->bop_clear != NULL)
1613                 btree->b_ops->bop_clear(btree);
1614
1615         /* ptr must be a pointer to a buffer head. */
1616         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1617
1618         /* convert and insert */
1619         dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1620         nilfs_btree_init(btree);
1621         if (nreq != NULL) {
1622                 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1623                 nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1624
1625                 /* create child node at level 1 */
1626                 node = (struct nilfs_btree_node *)bh->b_data;
1627                 nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs);
1628                 nilfs_btree_node_insert(btree, node, key, dreq->bpr_ptr, n);
1629                 if (!buffer_dirty(bh))
1630                         nilfs_btnode_mark_dirty(bh);
1631                 if (!nilfs_bmap_dirty(btree))
1632                         nilfs_bmap_set_dirty(btree);
1633
1634                 brelse(bh);
1635
1636                 /* create root node at level 2 */
1637                 node = nilfs_btree_get_root(btree);
1638                 tmpptr = nreq->bpr_ptr;
1639                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1640                                       2, 1, &keys[0], &tmpptr);
1641         } else {
1642                 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1643
1644                 /* create root node at level 1 */
1645                 node = nilfs_btree_get_root(btree);
1646                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1647                                       1, n, keys, ptrs);
1648                 nilfs_btree_node_insert(btree, node, key, dreq->bpr_ptr, n);
1649                 if (!nilfs_bmap_dirty(btree))
1650                         nilfs_bmap_set_dirty(btree);
1651         }
1652
1653         if (NILFS_BMAP_USE_VBN(btree))
1654                 nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1655 }
1656
1657 /**
1658  * nilfs_btree_convert_and_insert -
1659  * @bmap:
1660  * @key:
1661  * @ptr:
1662  * @keys:
1663  * @ptrs:
1664  * @n:
1665  */
1666 int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1667                                    __u64 key, __u64 ptr,
1668                                    const __u64 *keys, const __u64 *ptrs, int n)
1669 {
1670         struct buffer_head *bh;
1671         union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1672         struct nilfs_bmap_stats stats;
1673         int ret;
1674
1675         if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1676                 di = &dreq;
1677                 ni = NULL;
1678         } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1679                            1 << btree->b_inode->i_blkbits)) {
1680                 di = &dreq;
1681                 ni = &nreq;
1682         } else {
1683                 di = NULL;
1684                 ni = NULL;
1685                 BUG();
1686         }
1687
1688         ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1689                                                      &stats);
1690         if (ret < 0)
1691                 return ret;
1692         nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1693                                               di, ni, bh);
1694         nilfs_bmap_add_blocks(btree, stats.bs_nblocks);
1695         return 0;
1696 }
1697
1698 static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1699                                    struct nilfs_btree_path *path,
1700                                    int level,
1701                                    struct buffer_head *bh)
1702 {
1703         while ((++level < nilfs_btree_height(btree) - 1) &&
1704                !buffer_dirty(path[level].bp_bh))
1705                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1706
1707         return 0;
1708 }
1709
1710 static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1711                                         struct nilfs_btree_path *path,
1712                                         int level, struct inode *dat)
1713 {
1714         struct nilfs_btree_node *parent;
1715         int ret;
1716
1717         parent = nilfs_btree_get_node(btree, path, level + 1);
1718         path[level].bp_oldreq.bpr_ptr =
1719                 nilfs_btree_node_get_ptr(btree, parent,
1720                                          path[level + 1].bp_index);
1721         path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1722         ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1723                                        &path[level].bp_newreq.bpr_req);
1724         if (ret < 0)
1725                 return ret;
1726
1727         if (buffer_nilfs_node(path[level].bp_bh)) {
1728                 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1729                 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1730                 path[level].bp_ctxt.bh = path[level].bp_bh;
1731                 ret = nilfs_btnode_prepare_change_key(
1732                         &NILFS_BMAP_I(btree)->i_btnode_cache,
1733                         &path[level].bp_ctxt);
1734                 if (ret < 0) {
1735                         nilfs_dat_abort_update(dat,
1736                                                &path[level].bp_oldreq.bpr_req,
1737                                                &path[level].bp_newreq.bpr_req);
1738                         return ret;
1739                 }
1740         }
1741
1742         return 0;
1743 }
1744
1745 static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1746                                         struct nilfs_btree_path *path,
1747                                         int level, struct inode *dat)
1748 {
1749         struct nilfs_btree_node *parent;
1750
1751         nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1752                                 &path[level].bp_newreq.bpr_req,
1753                                 btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1754
1755         if (buffer_nilfs_node(path[level].bp_bh)) {
1756                 nilfs_btnode_commit_change_key(
1757                         &NILFS_BMAP_I(btree)->i_btnode_cache,
1758                         &path[level].bp_ctxt);
1759                 path[level].bp_bh = path[level].bp_ctxt.bh;
1760         }
1761         set_buffer_nilfs_volatile(path[level].bp_bh);
1762
1763         parent = nilfs_btree_get_node(btree, path, level + 1);
1764         nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index,
1765                                  path[level].bp_newreq.bpr_ptr);
1766 }
1767
1768 static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1769                                        struct nilfs_btree_path *path,
1770                                        int level, struct inode *dat)
1771 {
1772         nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1773                                &path[level].bp_newreq.bpr_req);
1774         if (buffer_nilfs_node(path[level].bp_bh))
1775                 nilfs_btnode_abort_change_key(
1776                         &NILFS_BMAP_I(btree)->i_btnode_cache,
1777                         &path[level].bp_ctxt);
1778 }
1779
1780 static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1781                                            struct nilfs_btree_path *path,
1782                                            int minlevel, int *maxlevelp,
1783                                            struct inode *dat)
1784 {
1785         int level, ret;
1786
1787         level = minlevel;
1788         if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1789                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1790                 if (ret < 0)
1791                         return ret;
1792         }
1793         while ((++level < nilfs_btree_height(btree) - 1) &&
1794                !buffer_dirty(path[level].bp_bh)) {
1795
1796                 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1797                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1798                 if (ret < 0)
1799                         goto out;
1800         }
1801
1802         /* success */
1803         *maxlevelp = level - 1;
1804         return 0;
1805
1806         /* error */
1807  out:
1808         while (--level > minlevel)
1809                 nilfs_btree_abort_update_v(btree, path, level, dat);
1810         if (!buffer_nilfs_volatile(path[level].bp_bh))
1811                 nilfs_btree_abort_update_v(btree, path, level, dat);
1812         return ret;
1813 }
1814
1815 static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
1816                                            struct nilfs_btree_path *path,
1817                                            int minlevel, int maxlevel,
1818                                            struct buffer_head *bh,
1819                                            struct inode *dat)
1820 {
1821         int level;
1822
1823         if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1824                 nilfs_btree_commit_update_v(btree, path, minlevel, dat);
1825
1826         for (level = minlevel + 1; level <= maxlevel; level++)
1827                 nilfs_btree_commit_update_v(btree, path, level, dat);
1828 }
1829
1830 static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
1831                                    struct nilfs_btree_path *path,
1832                                    int level, struct buffer_head *bh)
1833 {
1834         int maxlevel = 0, ret;
1835         struct nilfs_btree_node *parent;
1836         struct inode *dat = nilfs_bmap_get_dat(btree);
1837         __u64 ptr;
1838
1839         get_bh(bh);
1840         path[level].bp_bh = bh;
1841         ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
1842                                               dat);
1843         if (ret < 0)
1844                 goto out;
1845
1846         if (buffer_nilfs_volatile(path[level].bp_bh)) {
1847                 parent = nilfs_btree_get_node(btree, path, level + 1);
1848                 ptr = nilfs_btree_node_get_ptr(btree, parent,
1849                                                path[level + 1].bp_index);
1850                 ret = nilfs_dat_mark_dirty(dat, ptr);
1851                 if (ret < 0)
1852                         goto out;
1853         }
1854
1855         nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
1856
1857  out:
1858         brelse(path[level].bp_bh);
1859         path[level].bp_bh = NULL;
1860         return ret;
1861 }
1862
1863 static int nilfs_btree_propagate(struct nilfs_bmap *btree,
1864                                  struct buffer_head *bh)
1865 {
1866         struct nilfs_btree_path *path;
1867         struct nilfs_btree_node *node;
1868         __u64 key;
1869         int level, ret;
1870
1871         WARN_ON(!buffer_dirty(bh));
1872
1873         path = nilfs_btree_alloc_path();
1874         if (path == NULL)
1875                 return -ENOMEM;
1876
1877         if (buffer_nilfs_node(bh)) {
1878                 node = (struct nilfs_btree_node *)bh->b_data;
1879                 key = nilfs_btree_node_get_key(node, 0);
1880                 level = nilfs_btree_node_get_level(node);
1881         } else {
1882                 key = nilfs_bmap_data_get_key(btree, bh);
1883                 level = NILFS_BTREE_LEVEL_DATA;
1884         }
1885
1886         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
1887         if (ret < 0) {
1888                 if (unlikely(ret == -ENOENT))
1889                         printk(KERN_CRIT "%s: key = %llu, level == %d\n",
1890                                __func__, (unsigned long long)key, level);
1891                 goto out;
1892         }
1893
1894         ret = NILFS_BMAP_USE_VBN(btree) ?
1895                 nilfs_btree_propagate_v(btree, path, level, bh) :
1896                 nilfs_btree_propagate_p(btree, path, level, bh);
1897
1898  out:
1899         nilfs_btree_free_path(path);
1900
1901         return ret;
1902 }
1903
1904 static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
1905                                     struct buffer_head *bh)
1906 {
1907         return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
1908 }
1909
1910 static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
1911                                          struct list_head *lists,
1912                                          struct buffer_head *bh)
1913 {
1914         struct list_head *head;
1915         struct buffer_head *cbh;
1916         struct nilfs_btree_node *node, *cnode;
1917         __u64 key, ckey;
1918         int level;
1919
1920         get_bh(bh);
1921         node = (struct nilfs_btree_node *)bh->b_data;
1922         key = nilfs_btree_node_get_key(node, 0);
1923         level = nilfs_btree_node_get_level(node);
1924         if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
1925             level >= NILFS_BTREE_LEVEL_MAX) {
1926                 dump_stack();
1927                 printk(KERN_WARNING
1928                        "%s: invalid btree level: %d (key=%llu, ino=%lu, "
1929                        "blocknr=%llu)\n",
1930                        __func__, level, (unsigned long long)key,
1931                        NILFS_BMAP_I(btree)->vfs_inode.i_ino,
1932                        (unsigned long long)bh->b_blocknr);
1933                 return;
1934         }
1935
1936         list_for_each(head, &lists[level]) {
1937                 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
1938                 cnode = (struct nilfs_btree_node *)cbh->b_data;
1939                 ckey = nilfs_btree_node_get_key(cnode, 0);
1940                 if (key < ckey)
1941                         break;
1942         }
1943         list_add_tail(&bh->b_assoc_buffers, head);
1944 }
1945
1946 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
1947                                              struct list_head *listp)
1948 {
1949         struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache;
1950         struct list_head lists[NILFS_BTREE_LEVEL_MAX];
1951         struct pagevec pvec;
1952         struct buffer_head *bh, *head;
1953         pgoff_t index = 0;
1954         int level, i;
1955
1956         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1957              level < NILFS_BTREE_LEVEL_MAX;
1958              level++)
1959                 INIT_LIST_HEAD(&lists[level]);
1960
1961         pagevec_init(&pvec, 0);
1962
1963         while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
1964                                   PAGEVEC_SIZE)) {
1965                 for (i = 0; i < pagevec_count(&pvec); i++) {
1966                         bh = head = page_buffers(pvec.pages[i]);
1967                         do {
1968                                 if (buffer_dirty(bh))
1969                                         nilfs_btree_add_dirty_buffer(btree,
1970                                                                      lists, bh);
1971                         } while ((bh = bh->b_this_page) != head);
1972                 }
1973                 pagevec_release(&pvec);
1974                 cond_resched();
1975         }
1976
1977         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1978              level < NILFS_BTREE_LEVEL_MAX;
1979              level++)
1980                 list_splice_tail(&lists[level], listp);
1981 }
1982
1983 static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
1984                                 struct nilfs_btree_path *path,
1985                                 int level,
1986                                 struct buffer_head **bh,
1987                                 sector_t blocknr,
1988                                 union nilfs_binfo *binfo)
1989 {
1990         struct nilfs_btree_node *parent;
1991         __u64 key;
1992         __u64 ptr;
1993         int ret;
1994
1995         parent = nilfs_btree_get_node(btree, path, level + 1);
1996         ptr = nilfs_btree_node_get_ptr(btree, parent,
1997                                        path[level + 1].bp_index);
1998         if (buffer_nilfs_node(*bh)) {
1999                 path[level].bp_ctxt.oldkey = ptr;
2000                 path[level].bp_ctxt.newkey = blocknr;
2001                 path[level].bp_ctxt.bh = *bh;
2002                 ret = nilfs_btnode_prepare_change_key(
2003                         &NILFS_BMAP_I(btree)->i_btnode_cache,
2004                         &path[level].bp_ctxt);
2005                 if (ret < 0)
2006                         return ret;
2007                 nilfs_btnode_commit_change_key(
2008                         &NILFS_BMAP_I(btree)->i_btnode_cache,
2009                         &path[level].bp_ctxt);
2010                 *bh = path[level].bp_ctxt.bh;
2011         }
2012
2013         nilfs_btree_node_set_ptr(btree, parent,
2014                                  path[level + 1].bp_index, blocknr);
2015
2016         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2017         /* on-disk format */
2018         binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2019         binfo->bi_dat.bi_level = level;
2020
2021         return 0;
2022 }
2023
2024 static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2025                                 struct nilfs_btree_path *path,
2026                                 int level,
2027                                 struct buffer_head **bh,
2028                                 sector_t blocknr,
2029                                 union nilfs_binfo *binfo)
2030 {
2031         struct nilfs_btree_node *parent;
2032         struct inode *dat = nilfs_bmap_get_dat(btree);
2033         __u64 key;
2034         __u64 ptr;
2035         union nilfs_bmap_ptr_req req;
2036         int ret;
2037
2038         parent = nilfs_btree_get_node(btree, path, level + 1);
2039         ptr = nilfs_btree_node_get_ptr(btree, parent, path[level + 1].bp_index);
2040         req.bpr_ptr = ptr;
2041         ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2042         if (ret < 0)
2043                 return ret;
2044         nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2045
2046         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2047         /* on-disk format */
2048         binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2049         binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2050
2051         return 0;
2052 }
2053
2054 static int nilfs_btree_assign(struct nilfs_bmap *btree,
2055                               struct buffer_head **bh,
2056                               sector_t blocknr,
2057                               union nilfs_binfo *binfo)
2058 {
2059         struct nilfs_btree_path *path;
2060         struct nilfs_btree_node *node;
2061         __u64 key;
2062         int level, ret;
2063
2064         path = nilfs_btree_alloc_path();
2065         if (path == NULL)
2066                 return -ENOMEM;
2067
2068         if (buffer_nilfs_node(*bh)) {
2069                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2070                 key = nilfs_btree_node_get_key(node, 0);
2071                 level = nilfs_btree_node_get_level(node);
2072         } else {
2073                 key = nilfs_bmap_data_get_key(btree, *bh);
2074                 level = NILFS_BTREE_LEVEL_DATA;
2075         }
2076
2077         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
2078         if (ret < 0) {
2079                 WARN_ON(ret == -ENOENT);
2080                 goto out;
2081         }
2082
2083         ret = NILFS_BMAP_USE_VBN(btree) ?
2084                 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2085                 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2086
2087  out:
2088         nilfs_btree_free_path(path);
2089
2090         return ret;
2091 }
2092
2093 static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2094                                  struct buffer_head **bh,
2095                                  sector_t blocknr,
2096                                  union nilfs_binfo *binfo)
2097 {
2098         struct nilfs_btree_node *node;
2099         __u64 key;
2100         int ret;
2101
2102         ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2103                              blocknr);
2104         if (ret < 0)
2105                 return ret;
2106
2107         if (buffer_nilfs_node(*bh)) {
2108                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2109                 key = nilfs_btree_node_get_key(node, 0);
2110         } else
2111                 key = nilfs_bmap_data_get_key(btree, *bh);
2112
2113         /* on-disk format */
2114         binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2115         binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2116
2117         return 0;
2118 }
2119
2120 static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2121 {
2122         struct buffer_head *bh;
2123         struct nilfs_btree_path *path;
2124         __u64 ptr;
2125         int ret;
2126
2127         path = nilfs_btree_alloc_path();
2128         if (path == NULL)
2129                 return -ENOMEM;
2130
2131         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2132         if (ret < 0) {
2133                 WARN_ON(ret == -ENOENT);
2134                 goto out;
2135         }
2136         ret = nilfs_btree_get_block(btree, ptr, &bh);
2137         if (ret < 0) {
2138                 WARN_ON(ret == -ENOENT);
2139                 goto out;
2140         }
2141
2142         if (!buffer_dirty(bh))
2143                 nilfs_btnode_mark_dirty(bh);
2144         brelse(bh);
2145         if (!nilfs_bmap_dirty(btree))
2146                 nilfs_bmap_set_dirty(btree);
2147
2148  out:
2149         nilfs_btree_free_path(path);
2150         return ret;
2151 }
2152
2153 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2154         .bop_lookup             =       nilfs_btree_lookup,
2155         .bop_lookup_contig      =       nilfs_btree_lookup_contig,
2156         .bop_insert             =       nilfs_btree_insert,
2157         .bop_delete             =       nilfs_btree_delete,
2158         .bop_clear              =       NULL,
2159
2160         .bop_propagate          =       nilfs_btree_propagate,
2161
2162         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2163
2164         .bop_assign             =       nilfs_btree_assign,
2165         .bop_mark               =       nilfs_btree_mark,
2166
2167         .bop_last_key           =       nilfs_btree_last_key,
2168         .bop_check_insert       =       NULL,
2169         .bop_check_delete       =       nilfs_btree_check_delete,
2170         .bop_gather_data        =       nilfs_btree_gather_data,
2171 };
2172
2173 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2174         .bop_lookup             =       NULL,
2175         .bop_lookup_contig      =       NULL,
2176         .bop_insert             =       NULL,
2177         .bop_delete             =       NULL,
2178         .bop_clear              =       NULL,
2179
2180         .bop_propagate          =       nilfs_btree_propagate_gc,
2181
2182         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2183
2184         .bop_assign             =       nilfs_btree_assign_gc,
2185         .bop_mark               =       NULL,
2186
2187         .bop_last_key           =       NULL,
2188         .bop_check_insert       =       NULL,
2189         .bop_check_delete       =       NULL,
2190         .bop_gather_data        =       NULL,
2191 };
2192
2193 int nilfs_btree_init(struct nilfs_bmap *bmap)
2194 {
2195         bmap->b_ops = &nilfs_btree_ops;
2196         return 0;
2197 }
2198
2199 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2200 {
2201         bmap->b_ops = &nilfs_btree_ops_gc;
2202 }