Merge git://www.linux-watchdog.org/linux-watchdog
[cascardo/linux.git] / fs / nilfs2 / btree.c
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * Written by Koji Sato.
17  */
18
19 #include <linux/slab.h>
20 #include <linux/string.h>
21 #include <linux/errno.h>
22 #include <linux/pagevec.h>
23 #include "nilfs.h"
24 #include "page.h"
25 #include "btnode.h"
26 #include "btree.h"
27 #include "alloc.h"
28 #include "dat.h"
29
30 static void __nilfs_btree_init(struct nilfs_bmap *bmap);
31
32 static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
33 {
34         struct nilfs_btree_path *path;
35         int level = NILFS_BTREE_LEVEL_DATA;
36
37         path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
38         if (path == NULL)
39                 goto out;
40
41         for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
42                 path[level].bp_bh = NULL;
43                 path[level].bp_sib_bh = NULL;
44                 path[level].bp_index = 0;
45                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
46                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
47                 path[level].bp_op = NULL;
48         }
49
50 out:
51         return path;
52 }
53
54 static void nilfs_btree_free_path(struct nilfs_btree_path *path)
55 {
56         int level = NILFS_BTREE_LEVEL_DATA;
57
58         for (; level < NILFS_BTREE_LEVEL_MAX; level++)
59                 brelse(path[level].bp_bh);
60
61         kmem_cache_free(nilfs_btree_path_cache, path);
62 }
63
64 /*
65  * B-tree node operations
66  */
67 static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
68                                      __u64 ptr, struct buffer_head **bhp)
69 {
70         struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
71         struct buffer_head *bh;
72
73         bh = nilfs_btnode_create_block(btnc, ptr);
74         if (!bh)
75                 return -ENOMEM;
76
77         set_buffer_nilfs_volatile(bh);
78         *bhp = bh;
79         return 0;
80 }
81
82 static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
83 {
84         return node->bn_flags;
85 }
86
87 static void
88 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
89 {
90         node->bn_flags = flags;
91 }
92
93 static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
94 {
95         return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
96 }
97
98 static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
99 {
100         return node->bn_level;
101 }
102
103 static void
104 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
105 {
106         node->bn_level = level;
107 }
108
109 static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
110 {
111         return le16_to_cpu(node->bn_nchildren);
112 }
113
114 static void
115 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
116 {
117         node->bn_nchildren = cpu_to_le16(nchildren);
118 }
119
120 static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
121 {
122         return 1 << btree->b_inode->i_blkbits;
123 }
124
125 static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
126 {
127         return btree->b_nchildren_per_block;
128 }
129
130 static __le64 *
131 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
132 {
133         return (__le64 *)((char *)(node + 1) +
134                           (nilfs_btree_node_root(node) ?
135                            0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
136 }
137
138 static __le64 *
139 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
140 {
141         return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
142 }
143
144 static __u64
145 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
146 {
147         return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
148 }
149
150 static void
151 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
152 {
153         *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
154 }
155
156 static __u64
157 nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
158                          int ncmax)
159 {
160         return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
161 }
162
163 static void
164 nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
165                          int ncmax)
166 {
167         *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
168 }
169
170 static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
171                                   int level, int nchildren, int ncmax,
172                                   const __u64 *keys, const __u64 *ptrs)
173 {
174         __le64 *dkeys;
175         __le64 *dptrs;
176         int i;
177
178         nilfs_btree_node_set_flags(node, flags);
179         nilfs_btree_node_set_level(node, level);
180         nilfs_btree_node_set_nchildren(node, nchildren);
181
182         dkeys = nilfs_btree_node_dkeys(node);
183         dptrs = nilfs_btree_node_dptrs(node, ncmax);
184         for (i = 0; i < nchildren; i++) {
185                 dkeys[i] = cpu_to_le64(keys[i]);
186                 dptrs[i] = cpu_to_le64(ptrs[i]);
187         }
188 }
189
190 /* Assume the buffer heads corresponding to left and right are locked. */
191 static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
192                                        struct nilfs_btree_node *right,
193                                        int n, int lncmax, int rncmax)
194 {
195         __le64 *ldkeys, *rdkeys;
196         __le64 *ldptrs, *rdptrs;
197         int lnchildren, rnchildren;
198
199         ldkeys = nilfs_btree_node_dkeys(left);
200         ldptrs = nilfs_btree_node_dptrs(left, lncmax);
201         lnchildren = nilfs_btree_node_get_nchildren(left);
202
203         rdkeys = nilfs_btree_node_dkeys(right);
204         rdptrs = nilfs_btree_node_dptrs(right, rncmax);
205         rnchildren = nilfs_btree_node_get_nchildren(right);
206
207         memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
208         memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
209         memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
210         memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
211
212         lnchildren += n;
213         rnchildren -= n;
214         nilfs_btree_node_set_nchildren(left, lnchildren);
215         nilfs_btree_node_set_nchildren(right, rnchildren);
216 }
217
218 /* Assume that the buffer heads corresponding to left and right are locked. */
219 static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
220                                         struct nilfs_btree_node *right,
221                                         int n, int lncmax, int rncmax)
222 {
223         __le64 *ldkeys, *rdkeys;
224         __le64 *ldptrs, *rdptrs;
225         int lnchildren, rnchildren;
226
227         ldkeys = nilfs_btree_node_dkeys(left);
228         ldptrs = nilfs_btree_node_dptrs(left, lncmax);
229         lnchildren = nilfs_btree_node_get_nchildren(left);
230
231         rdkeys = nilfs_btree_node_dkeys(right);
232         rdptrs = nilfs_btree_node_dptrs(right, rncmax);
233         rnchildren = nilfs_btree_node_get_nchildren(right);
234
235         memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
236         memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
237         memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
238         memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
239
240         lnchildren -= n;
241         rnchildren += n;
242         nilfs_btree_node_set_nchildren(left, lnchildren);
243         nilfs_btree_node_set_nchildren(right, rnchildren);
244 }
245
246 /* Assume that the buffer head corresponding to node is locked. */
247 static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
248                                     __u64 key, __u64 ptr, int ncmax)
249 {
250         __le64 *dkeys;
251         __le64 *dptrs;
252         int nchildren;
253
254         dkeys = nilfs_btree_node_dkeys(node);
255         dptrs = nilfs_btree_node_dptrs(node, ncmax);
256         nchildren = nilfs_btree_node_get_nchildren(node);
257         if (index < nchildren) {
258                 memmove(dkeys + index + 1, dkeys + index,
259                         (nchildren - index) * sizeof(*dkeys));
260                 memmove(dptrs + index + 1, dptrs + index,
261                         (nchildren - index) * sizeof(*dptrs));
262         }
263         dkeys[index] = cpu_to_le64(key);
264         dptrs[index] = cpu_to_le64(ptr);
265         nchildren++;
266         nilfs_btree_node_set_nchildren(node, nchildren);
267 }
268
269 /* Assume that the buffer head corresponding to node is locked. */
270 static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
271                                     __u64 *keyp, __u64 *ptrp, int ncmax)
272 {
273         __u64 key;
274         __u64 ptr;
275         __le64 *dkeys;
276         __le64 *dptrs;
277         int nchildren;
278
279         dkeys = nilfs_btree_node_dkeys(node);
280         dptrs = nilfs_btree_node_dptrs(node, ncmax);
281         key = le64_to_cpu(dkeys[index]);
282         ptr = le64_to_cpu(dptrs[index]);
283         nchildren = nilfs_btree_node_get_nchildren(node);
284         if (keyp != NULL)
285                 *keyp = key;
286         if (ptrp != NULL)
287                 *ptrp = ptr;
288
289         if (index < nchildren - 1) {
290                 memmove(dkeys + index, dkeys + index + 1,
291                         (nchildren - index - 1) * sizeof(*dkeys));
292                 memmove(dptrs + index, dptrs + index + 1,
293                         (nchildren - index - 1) * sizeof(*dptrs));
294         }
295         nchildren--;
296         nilfs_btree_node_set_nchildren(node, nchildren);
297 }
298
299 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
300                                    __u64 key, int *indexp)
301 {
302         __u64 nkey;
303         int index, low, high, s;
304
305         /* binary search */
306         low = 0;
307         high = nilfs_btree_node_get_nchildren(node) - 1;
308         index = 0;
309         s = 0;
310         while (low <= high) {
311                 index = (low + high) / 2;
312                 nkey = nilfs_btree_node_get_key(node, index);
313                 if (nkey == key) {
314                         s = 0;
315                         goto out;
316                 } else if (nkey < key) {
317                         low = index + 1;
318                         s = -1;
319                 } else {
320                         high = index - 1;
321                         s = 1;
322                 }
323         }
324
325         /* adjust index */
326         if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
327                 if (s > 0 && index > 0)
328                         index--;
329         } else if (s < 0)
330                 index++;
331
332  out:
333         *indexp = index;
334
335         return s == 0;
336 }
337
338 /**
339  * nilfs_btree_node_broken - verify consistency of btree node
340  * @node: btree node block to be examined
341  * @size: node size (in bytes)
342  * @blocknr: block number
343  *
344  * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
345  */
346 static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
347                                    size_t size, sector_t blocknr)
348 {
349         int level, flags, nchildren;
350         int ret = 0;
351
352         level = nilfs_btree_node_get_level(node);
353         flags = nilfs_btree_node_get_flags(node);
354         nchildren = nilfs_btree_node_get_nchildren(node);
355
356         if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
357                      level >= NILFS_BTREE_LEVEL_MAX ||
358                      (flags & NILFS_BTREE_NODE_ROOT) ||
359                      nchildren < 0 ||
360                      nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
361                 printk(KERN_CRIT "NILFS: bad btree node (blocknr=%llu): "
362                        "level = %d, flags = 0x%x, nchildren = %d\n",
363                        (unsigned long long)blocknr, level, flags, nchildren);
364                 ret = 1;
365         }
366         return ret;
367 }
368
369 /**
370  * nilfs_btree_root_broken - verify consistency of btree root node
371  * @node: btree root node to be examined
372  * @ino: inode number
373  *
374  * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
375  */
376 static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
377                                    unsigned long ino)
378 {
379         int level, flags, nchildren;
380         int ret = 0;
381
382         level = nilfs_btree_node_get_level(node);
383         flags = nilfs_btree_node_get_flags(node);
384         nchildren = nilfs_btree_node_get_nchildren(node);
385
386         if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
387                      level >= NILFS_BTREE_LEVEL_MAX ||
388                      nchildren < 0 ||
389                      nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
390                 pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n",
391                         ino, level, flags, nchildren);
392                 ret = 1;
393         }
394         return ret;
395 }
396
397 int nilfs_btree_broken_node_block(struct buffer_head *bh)
398 {
399         int ret;
400
401         if (buffer_nilfs_checked(bh))
402                 return 0;
403
404         ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
405                                        bh->b_size, bh->b_blocknr);
406         if (likely(!ret))
407                 set_buffer_nilfs_checked(bh);
408         return ret;
409 }
410
411 static struct nilfs_btree_node *
412 nilfs_btree_get_root(const struct nilfs_bmap *btree)
413 {
414         return (struct nilfs_btree_node *)btree->b_u.u_data;
415 }
416
417 static struct nilfs_btree_node *
418 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
419 {
420         return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
421 }
422
423 static struct nilfs_btree_node *
424 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
425 {
426         return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
427 }
428
429 static int nilfs_btree_height(const struct nilfs_bmap *btree)
430 {
431         return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
432 }
433
434 static struct nilfs_btree_node *
435 nilfs_btree_get_node(const struct nilfs_bmap *btree,
436                      const struct nilfs_btree_path *path,
437                      int level, int *ncmaxp)
438 {
439         struct nilfs_btree_node *node;
440
441         if (level == nilfs_btree_height(btree) - 1) {
442                 node = nilfs_btree_get_root(btree);
443                 *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
444         } else {
445                 node = nilfs_btree_get_nonroot_node(path, level);
446                 *ncmaxp = nilfs_btree_nchildren_per_block(btree);
447         }
448         return node;
449 }
450
451 static int
452 nilfs_btree_bad_node(struct nilfs_btree_node *node, int level)
453 {
454         if (unlikely(nilfs_btree_node_get_level(node) != level)) {
455                 dump_stack();
456                 printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n",
457                        nilfs_btree_node_get_level(node), level);
458                 return 1;
459         }
460         return 0;
461 }
462
463 struct nilfs_btree_readahead_info {
464         struct nilfs_btree_node *node;  /* parent node */
465         int max_ra_blocks;              /* max nof blocks to read ahead */
466         int index;                      /* current index on the parent node */
467         int ncmax;                      /* nof children in the parent node */
468 };
469
470 static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
471                                    struct buffer_head **bhp,
472                                    const struct nilfs_btree_readahead_info *ra)
473 {
474         struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
475         struct buffer_head *bh, *ra_bh;
476         sector_t submit_ptr = 0;
477         int ret;
478
479         ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, 0, &bh,
480                                         &submit_ptr);
481         if (ret) {
482                 if (ret != -EEXIST)
483                         return ret;
484                 goto out_check;
485         }
486
487         if (ra) {
488                 int i, n;
489                 __u64 ptr2;
490
491                 /* read ahead sibling nodes */
492                 for (n = ra->max_ra_blocks, i = ra->index + 1;
493                      n > 0 && i < ra->ncmax; n--, i++) {
494                         ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
495
496                         ret = nilfs_btnode_submit_block(btnc, ptr2, 0,
497                                                         REQ_OP_READ, REQ_RAHEAD,
498                                                         &ra_bh, &submit_ptr);
499                         if (likely(!ret || ret == -EEXIST))
500                                 brelse(ra_bh);
501                         else if (ret != -EBUSY)
502                                 break;
503                         if (!buffer_locked(bh))
504                                 goto out_no_wait;
505                 }
506         }
507
508         wait_on_buffer(bh);
509
510  out_no_wait:
511         if (!buffer_uptodate(bh)) {
512                 brelse(bh);
513                 return -EIO;
514         }
515
516  out_check:
517         if (nilfs_btree_broken_node_block(bh)) {
518                 clear_buffer_uptodate(bh);
519                 brelse(bh);
520                 return -EINVAL;
521         }
522
523         *bhp = bh;
524         return 0;
525 }
526
527 static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
528                                    struct buffer_head **bhp)
529 {
530         return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
531 }
532
533 static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
534                                  struct nilfs_btree_path *path,
535                                  __u64 key, __u64 *ptrp, int minlevel,
536                                  int readahead)
537 {
538         struct nilfs_btree_node *node;
539         struct nilfs_btree_readahead_info p, *ra;
540         __u64 ptr;
541         int level, index, found, ncmax, ret;
542
543         node = nilfs_btree_get_root(btree);
544         level = nilfs_btree_node_get_level(node);
545         if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
546                 return -ENOENT;
547
548         found = nilfs_btree_node_lookup(node, key, &index);
549         ptr = nilfs_btree_node_get_ptr(node, index,
550                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
551         path[level].bp_bh = NULL;
552         path[level].bp_index = index;
553
554         ncmax = nilfs_btree_nchildren_per_block(btree);
555
556         while (--level >= minlevel) {
557                 ra = NULL;
558                 if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
559                         p.node = nilfs_btree_get_node(btree, path, level + 1,
560                                                       &p.ncmax);
561                         p.index = index;
562                         p.max_ra_blocks = 7;
563                         ra = &p;
564                 }
565                 ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
566                                               ra);
567                 if (ret < 0)
568                         return ret;
569
570                 node = nilfs_btree_get_nonroot_node(path, level);
571                 if (nilfs_btree_bad_node(node, level))
572                         return -EINVAL;
573                 if (!found)
574                         found = nilfs_btree_node_lookup(node, key, &index);
575                 else
576                         index = 0;
577                 if (index < ncmax) {
578                         ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
579                 } else {
580                         WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
581                         /* insert */
582                         ptr = NILFS_BMAP_INVALID_PTR;
583                 }
584                 path[level].bp_index = index;
585         }
586         if (!found)
587                 return -ENOENT;
588
589         if (ptrp != NULL)
590                 *ptrp = ptr;
591
592         return 0;
593 }
594
595 static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
596                                       struct nilfs_btree_path *path,
597                                       __u64 *keyp, __u64 *ptrp)
598 {
599         struct nilfs_btree_node *node;
600         __u64 ptr;
601         int index, level, ncmax, ret;
602
603         node = nilfs_btree_get_root(btree);
604         index = nilfs_btree_node_get_nchildren(node) - 1;
605         if (index < 0)
606                 return -ENOENT;
607         level = nilfs_btree_node_get_level(node);
608         ptr = nilfs_btree_node_get_ptr(node, index,
609                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
610         path[level].bp_bh = NULL;
611         path[level].bp_index = index;
612         ncmax = nilfs_btree_nchildren_per_block(btree);
613
614         for (level--; level > 0; level--) {
615                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
616                 if (ret < 0)
617                         return ret;
618                 node = nilfs_btree_get_nonroot_node(path, level);
619                 if (nilfs_btree_bad_node(node, level))
620                         return -EINVAL;
621                 index = nilfs_btree_node_get_nchildren(node) - 1;
622                 ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
623                 path[level].bp_index = index;
624         }
625
626         if (keyp != NULL)
627                 *keyp = nilfs_btree_node_get_key(node, index);
628         if (ptrp != NULL)
629                 *ptrp = ptr;
630
631         return 0;
632 }
633
634 /**
635  * nilfs_btree_get_next_key - get next valid key from btree path array
636  * @btree: bmap struct of btree
637  * @path: array of nilfs_btree_path struct
638  * @minlevel: start level
639  * @nextkey: place to store the next valid key
640  *
641  * Return Value: If a next key was found, 0 is returned. Otherwise,
642  * -ENOENT is returned.
643  */
644 static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree,
645                                     const struct nilfs_btree_path *path,
646                                     int minlevel, __u64 *nextkey)
647 {
648         struct nilfs_btree_node *node;
649         int maxlevel = nilfs_btree_height(btree) - 1;
650         int index, next_adj, level;
651
652         /* Next index is already set to bp_index for leaf nodes. */
653         next_adj = 0;
654         for (level = minlevel; level <= maxlevel; level++) {
655                 if (level == maxlevel)
656                         node = nilfs_btree_get_root(btree);
657                 else
658                         node = nilfs_btree_get_nonroot_node(path, level);
659
660                 index = path[level].bp_index + next_adj;
661                 if (index < nilfs_btree_node_get_nchildren(node)) {
662                         /* Next key is in this node */
663                         *nextkey = nilfs_btree_node_get_key(node, index);
664                         return 0;
665                 }
666                 /* For non-leaf nodes, next index is stored at bp_index + 1. */
667                 next_adj = 1;
668         }
669         return -ENOENT;
670 }
671
672 static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
673                               __u64 key, int level, __u64 *ptrp)
674 {
675         struct nilfs_btree_path *path;
676         int ret;
677
678         path = nilfs_btree_alloc_path();
679         if (path == NULL)
680                 return -ENOMEM;
681
682         ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
683
684         nilfs_btree_free_path(path);
685
686         return ret;
687 }
688
689 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
690                                      __u64 key, __u64 *ptrp,
691                                      unsigned int maxblocks)
692 {
693         struct nilfs_btree_path *path;
694         struct nilfs_btree_node *node;
695         struct inode *dat = NULL;
696         __u64 ptr, ptr2;
697         sector_t blocknr;
698         int level = NILFS_BTREE_LEVEL_NODE_MIN;
699         int ret, cnt, index, maxlevel, ncmax;
700         struct nilfs_btree_readahead_info p;
701
702         path = nilfs_btree_alloc_path();
703         if (path == NULL)
704                 return -ENOMEM;
705
706         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
707         if (ret < 0)
708                 goto out;
709
710         if (NILFS_BMAP_USE_VBN(btree)) {
711                 dat = nilfs_bmap_get_dat(btree);
712                 ret = nilfs_dat_translate(dat, ptr, &blocknr);
713                 if (ret < 0)
714                         goto out;
715                 ptr = blocknr;
716         }
717         cnt = 1;
718         if (cnt == maxblocks)
719                 goto end;
720
721         maxlevel = nilfs_btree_height(btree) - 1;
722         node = nilfs_btree_get_node(btree, path, level, &ncmax);
723         index = path[level].bp_index + 1;
724         for (;;) {
725                 while (index < nilfs_btree_node_get_nchildren(node)) {
726                         if (nilfs_btree_node_get_key(node, index) !=
727                             key + cnt)
728                                 goto end;
729                         ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
730                         if (dat) {
731                                 ret = nilfs_dat_translate(dat, ptr2, &blocknr);
732                                 if (ret < 0)
733                                         goto out;
734                                 ptr2 = blocknr;
735                         }
736                         if (ptr2 != ptr + cnt || ++cnt == maxblocks)
737                                 goto end;
738                         index++;
739                         continue;
740                 }
741                 if (level == maxlevel)
742                         break;
743
744                 /* look-up right sibling node */
745                 p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
746                 p.index = path[level + 1].bp_index + 1;
747                 p.max_ra_blocks = 7;
748                 if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
749                     nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
750                         break;
751                 ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
752                 path[level + 1].bp_index = p.index;
753
754                 brelse(path[level].bp_bh);
755                 path[level].bp_bh = NULL;
756
757                 ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
758                                               &p);
759                 if (ret < 0)
760                         goto out;
761                 node = nilfs_btree_get_nonroot_node(path, level);
762                 ncmax = nilfs_btree_nchildren_per_block(btree);
763                 index = 0;
764                 path[level].bp_index = index;
765         }
766  end:
767         *ptrp = ptr;
768         ret = cnt;
769  out:
770         nilfs_btree_free_path(path);
771         return ret;
772 }
773
774 static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
775                                     struct nilfs_btree_path *path,
776                                     int level, __u64 key)
777 {
778         if (level < nilfs_btree_height(btree) - 1) {
779                 do {
780                         nilfs_btree_node_set_key(
781                                 nilfs_btree_get_nonroot_node(path, level),
782                                 path[level].bp_index, key);
783                         if (!buffer_dirty(path[level].bp_bh))
784                                 mark_buffer_dirty(path[level].bp_bh);
785                 } while ((path[level].bp_index == 0) &&
786                          (++level < nilfs_btree_height(btree) - 1));
787         }
788
789         /* root */
790         if (level == nilfs_btree_height(btree) - 1) {
791                 nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
792                                          path[level].bp_index, key);
793         }
794 }
795
796 static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
797                                   struct nilfs_btree_path *path,
798                                   int level, __u64 *keyp, __u64 *ptrp)
799 {
800         struct nilfs_btree_node *node;
801         int ncblk;
802
803         if (level < nilfs_btree_height(btree) - 1) {
804                 node = nilfs_btree_get_nonroot_node(path, level);
805                 ncblk = nilfs_btree_nchildren_per_block(btree);
806                 nilfs_btree_node_insert(node, path[level].bp_index,
807                                         *keyp, *ptrp, ncblk);
808                 if (!buffer_dirty(path[level].bp_bh))
809                         mark_buffer_dirty(path[level].bp_bh);
810
811                 if (path[level].bp_index == 0)
812                         nilfs_btree_promote_key(btree, path, level + 1,
813                                                 nilfs_btree_node_get_key(node,
814                                                                          0));
815         } else {
816                 node = nilfs_btree_get_root(btree);
817                 nilfs_btree_node_insert(node, path[level].bp_index,
818                                         *keyp, *ptrp,
819                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
820         }
821 }
822
823 static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
824                                    struct nilfs_btree_path *path,
825                                    int level, __u64 *keyp, __u64 *ptrp)
826 {
827         struct nilfs_btree_node *node, *left;
828         int nchildren, lnchildren, n, move, ncblk;
829
830         node = nilfs_btree_get_nonroot_node(path, level);
831         left = nilfs_btree_get_sib_node(path, level);
832         nchildren = nilfs_btree_node_get_nchildren(node);
833         lnchildren = nilfs_btree_node_get_nchildren(left);
834         ncblk = nilfs_btree_nchildren_per_block(btree);
835         move = 0;
836
837         n = (nchildren + lnchildren + 1) / 2 - lnchildren;
838         if (n > path[level].bp_index) {
839                 /* move insert point */
840                 n--;
841                 move = 1;
842         }
843
844         nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
845
846         if (!buffer_dirty(path[level].bp_bh))
847                 mark_buffer_dirty(path[level].bp_bh);
848         if (!buffer_dirty(path[level].bp_sib_bh))
849                 mark_buffer_dirty(path[level].bp_sib_bh);
850
851         nilfs_btree_promote_key(btree, path, level + 1,
852                                 nilfs_btree_node_get_key(node, 0));
853
854         if (move) {
855                 brelse(path[level].bp_bh);
856                 path[level].bp_bh = path[level].bp_sib_bh;
857                 path[level].bp_sib_bh = NULL;
858                 path[level].bp_index += lnchildren;
859                 path[level + 1].bp_index--;
860         } else {
861                 brelse(path[level].bp_sib_bh);
862                 path[level].bp_sib_bh = NULL;
863                 path[level].bp_index -= n;
864         }
865
866         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
867 }
868
869 static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
870                                     struct nilfs_btree_path *path,
871                                     int level, __u64 *keyp, __u64 *ptrp)
872 {
873         struct nilfs_btree_node *node, *right;
874         int nchildren, rnchildren, n, move, ncblk;
875
876         node = nilfs_btree_get_nonroot_node(path, level);
877         right = nilfs_btree_get_sib_node(path, level);
878         nchildren = nilfs_btree_node_get_nchildren(node);
879         rnchildren = nilfs_btree_node_get_nchildren(right);
880         ncblk = nilfs_btree_nchildren_per_block(btree);
881         move = 0;
882
883         n = (nchildren + rnchildren + 1) / 2 - rnchildren;
884         if (n > nchildren - path[level].bp_index) {
885                 /* move insert point */
886                 n--;
887                 move = 1;
888         }
889
890         nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
891
892         if (!buffer_dirty(path[level].bp_bh))
893                 mark_buffer_dirty(path[level].bp_bh);
894         if (!buffer_dirty(path[level].bp_sib_bh))
895                 mark_buffer_dirty(path[level].bp_sib_bh);
896
897         path[level + 1].bp_index++;
898         nilfs_btree_promote_key(btree, path, level + 1,
899                                 nilfs_btree_node_get_key(right, 0));
900         path[level + 1].bp_index--;
901
902         if (move) {
903                 brelse(path[level].bp_bh);
904                 path[level].bp_bh = path[level].bp_sib_bh;
905                 path[level].bp_sib_bh = NULL;
906                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
907                 path[level + 1].bp_index++;
908         } else {
909                 brelse(path[level].bp_sib_bh);
910                 path[level].bp_sib_bh = NULL;
911         }
912
913         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
914 }
915
916 static void nilfs_btree_split(struct nilfs_bmap *btree,
917                               struct nilfs_btree_path *path,
918                               int level, __u64 *keyp, __u64 *ptrp)
919 {
920         struct nilfs_btree_node *node, *right;
921         int nchildren, n, move, ncblk;
922
923         node = nilfs_btree_get_nonroot_node(path, level);
924         right = nilfs_btree_get_sib_node(path, level);
925         nchildren = nilfs_btree_node_get_nchildren(node);
926         ncblk = nilfs_btree_nchildren_per_block(btree);
927         move = 0;
928
929         n = (nchildren + 1) / 2;
930         if (n > nchildren - path[level].bp_index) {
931                 n--;
932                 move = 1;
933         }
934
935         nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
936
937         if (!buffer_dirty(path[level].bp_bh))
938                 mark_buffer_dirty(path[level].bp_bh);
939         if (!buffer_dirty(path[level].bp_sib_bh))
940                 mark_buffer_dirty(path[level].bp_sib_bh);
941
942         if (move) {
943                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
944                 nilfs_btree_node_insert(right, path[level].bp_index,
945                                         *keyp, *ptrp, ncblk);
946
947                 *keyp = nilfs_btree_node_get_key(right, 0);
948                 *ptrp = path[level].bp_newreq.bpr_ptr;
949
950                 brelse(path[level].bp_bh);
951                 path[level].bp_bh = path[level].bp_sib_bh;
952                 path[level].bp_sib_bh = NULL;
953         } else {
954                 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
955
956                 *keyp = nilfs_btree_node_get_key(right, 0);
957                 *ptrp = path[level].bp_newreq.bpr_ptr;
958
959                 brelse(path[level].bp_sib_bh);
960                 path[level].bp_sib_bh = NULL;
961         }
962
963         path[level + 1].bp_index++;
964 }
965
966 static void nilfs_btree_grow(struct nilfs_bmap *btree,
967                              struct nilfs_btree_path *path,
968                              int level, __u64 *keyp, __u64 *ptrp)
969 {
970         struct nilfs_btree_node *root, *child;
971         int n, ncblk;
972
973         root = nilfs_btree_get_root(btree);
974         child = nilfs_btree_get_sib_node(path, level);
975         ncblk = nilfs_btree_nchildren_per_block(btree);
976
977         n = nilfs_btree_node_get_nchildren(root);
978
979         nilfs_btree_node_move_right(root, child, n,
980                                     NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
981         nilfs_btree_node_set_level(root, level + 1);
982
983         if (!buffer_dirty(path[level].bp_sib_bh))
984                 mark_buffer_dirty(path[level].bp_sib_bh);
985
986         path[level].bp_bh = path[level].bp_sib_bh;
987         path[level].bp_sib_bh = NULL;
988
989         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
990
991         *keyp = nilfs_btree_node_get_key(child, 0);
992         *ptrp = path[level].bp_newreq.bpr_ptr;
993 }
994
995 static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
996                                    const struct nilfs_btree_path *path)
997 {
998         struct nilfs_btree_node *node;
999         int level, ncmax;
1000
1001         if (path == NULL)
1002                 return NILFS_BMAP_INVALID_PTR;
1003
1004         /* left sibling */
1005         level = NILFS_BTREE_LEVEL_NODE_MIN;
1006         if (path[level].bp_index > 0) {
1007                 node = nilfs_btree_get_node(btree, path, level, &ncmax);
1008                 return nilfs_btree_node_get_ptr(node,
1009                                                 path[level].bp_index - 1,
1010                                                 ncmax);
1011         }
1012
1013         /* parent */
1014         level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
1015         if (level <= nilfs_btree_height(btree) - 1) {
1016                 node = nilfs_btree_get_node(btree, path, level, &ncmax);
1017                 return nilfs_btree_node_get_ptr(node, path[level].bp_index,
1018                                                 ncmax);
1019         }
1020
1021         return NILFS_BMAP_INVALID_PTR;
1022 }
1023
1024 static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
1025                                        const struct nilfs_btree_path *path,
1026                                        __u64 key)
1027 {
1028         __u64 ptr;
1029
1030         ptr = nilfs_bmap_find_target_seq(btree, key);
1031         if (ptr != NILFS_BMAP_INVALID_PTR)
1032                 /* sequential access */
1033                 return ptr;
1034
1035         ptr = nilfs_btree_find_near(btree, path);
1036         if (ptr != NILFS_BMAP_INVALID_PTR)
1037                 /* near */
1038                 return ptr;
1039
1040         /* block group */
1041         return nilfs_bmap_find_target_in_group(btree);
1042 }
1043
1044 static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
1045                                       struct nilfs_btree_path *path,
1046                                       int *levelp, __u64 key, __u64 ptr,
1047                                       struct nilfs_bmap_stats *stats)
1048 {
1049         struct buffer_head *bh;
1050         struct nilfs_btree_node *node, *parent, *sib;
1051         __u64 sibptr;
1052         int pindex, level, ncmax, ncblk, ret;
1053         struct inode *dat = NULL;
1054
1055         stats->bs_nblocks = 0;
1056         level = NILFS_BTREE_LEVEL_DATA;
1057
1058         /* allocate a new ptr for data block */
1059         if (NILFS_BMAP_USE_VBN(btree)) {
1060                 path[level].bp_newreq.bpr_ptr =
1061                         nilfs_btree_find_target_v(btree, path, key);
1062                 dat = nilfs_bmap_get_dat(btree);
1063         }
1064
1065         ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1066         if (ret < 0)
1067                 goto err_out_data;
1068
1069         ncblk = nilfs_btree_nchildren_per_block(btree);
1070
1071         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1072              level < nilfs_btree_height(btree) - 1;
1073              level++) {
1074                 node = nilfs_btree_get_nonroot_node(path, level);
1075                 if (nilfs_btree_node_get_nchildren(node) < ncblk) {
1076                         path[level].bp_op = nilfs_btree_do_insert;
1077                         stats->bs_nblocks++;
1078                         goto out;
1079                 }
1080
1081                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1082                 pindex = path[level + 1].bp_index;
1083
1084                 /* left sibling */
1085                 if (pindex > 0) {
1086                         sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1087                                                           ncmax);
1088                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1089                         if (ret < 0)
1090                                 goto err_out_child_node;
1091                         sib = (struct nilfs_btree_node *)bh->b_data;
1092                         if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1093                                 path[level].bp_sib_bh = bh;
1094                                 path[level].bp_op = nilfs_btree_carry_left;
1095                                 stats->bs_nblocks++;
1096                                 goto out;
1097                         } else {
1098                                 brelse(bh);
1099                         }
1100                 }
1101
1102                 /* right sibling */
1103                 if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
1104                         sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1105                                                           ncmax);
1106                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1107                         if (ret < 0)
1108                                 goto err_out_child_node;
1109                         sib = (struct nilfs_btree_node *)bh->b_data;
1110                         if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1111                                 path[level].bp_sib_bh = bh;
1112                                 path[level].bp_op = nilfs_btree_carry_right;
1113                                 stats->bs_nblocks++;
1114                                 goto out;
1115                         } else {
1116                                 brelse(bh);
1117                         }
1118                 }
1119
1120                 /* split */
1121                 path[level].bp_newreq.bpr_ptr =
1122                         path[level - 1].bp_newreq.bpr_ptr + 1;
1123                 ret = nilfs_bmap_prepare_alloc_ptr(btree,
1124                                                    &path[level].bp_newreq, dat);
1125                 if (ret < 0)
1126                         goto err_out_child_node;
1127                 ret = nilfs_btree_get_new_block(btree,
1128                                                 path[level].bp_newreq.bpr_ptr,
1129                                                 &bh);
1130                 if (ret < 0)
1131                         goto err_out_curr_node;
1132
1133                 stats->bs_nblocks++;
1134
1135                 sib = (struct nilfs_btree_node *)bh->b_data;
1136                 nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
1137                 path[level].bp_sib_bh = bh;
1138                 path[level].bp_op = nilfs_btree_split;
1139         }
1140
1141         /* root */
1142         node = nilfs_btree_get_root(btree);
1143         if (nilfs_btree_node_get_nchildren(node) <
1144             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1145                 path[level].bp_op = nilfs_btree_do_insert;
1146                 stats->bs_nblocks++;
1147                 goto out;
1148         }
1149
1150         /* grow */
1151         path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1152         ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1153         if (ret < 0)
1154                 goto err_out_child_node;
1155         ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1156                                         &bh);
1157         if (ret < 0)
1158                 goto err_out_curr_node;
1159
1160         nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
1161                               0, level, 0, ncblk, NULL, NULL);
1162         path[level].bp_sib_bh = bh;
1163         path[level].bp_op = nilfs_btree_grow;
1164
1165         level++;
1166         path[level].bp_op = nilfs_btree_do_insert;
1167
1168         /* a newly-created node block and a data block are added */
1169         stats->bs_nblocks += 2;
1170
1171         /* success */
1172  out:
1173         *levelp = level;
1174         return ret;
1175
1176         /* error */
1177  err_out_curr_node:
1178         nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1179  err_out_child_node:
1180         for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1181                 nilfs_btnode_delete(path[level].bp_sib_bh);
1182                 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1183
1184         }
1185
1186         nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1187  err_out_data:
1188         *levelp = level;
1189         stats->bs_nblocks = 0;
1190         return ret;
1191 }
1192
1193 static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1194                                       struct nilfs_btree_path *path,
1195                                       int maxlevel, __u64 key, __u64 ptr)
1196 {
1197         struct inode *dat = NULL;
1198         int level;
1199
1200         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1201         ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1202         if (NILFS_BMAP_USE_VBN(btree)) {
1203                 nilfs_bmap_set_target_v(btree, key, ptr);
1204                 dat = nilfs_bmap_get_dat(btree);
1205         }
1206
1207         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1208                 nilfs_bmap_commit_alloc_ptr(btree,
1209                                             &path[level - 1].bp_newreq, dat);
1210                 path[level].bp_op(btree, path, level, &key, &ptr);
1211         }
1212
1213         if (!nilfs_bmap_dirty(btree))
1214                 nilfs_bmap_set_dirty(btree);
1215 }
1216
1217 static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1218 {
1219         struct nilfs_btree_path *path;
1220         struct nilfs_bmap_stats stats;
1221         int level, ret;
1222
1223         path = nilfs_btree_alloc_path();
1224         if (path == NULL)
1225                 return -ENOMEM;
1226
1227         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1228                                     NILFS_BTREE_LEVEL_NODE_MIN, 0);
1229         if (ret != -ENOENT) {
1230                 if (ret == 0)
1231                         ret = -EEXIST;
1232                 goto out;
1233         }
1234
1235         ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1236         if (ret < 0)
1237                 goto out;
1238         nilfs_btree_commit_insert(btree, path, level, key, ptr);
1239         nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1240
1241  out:
1242         nilfs_btree_free_path(path);
1243         return ret;
1244 }
1245
1246 static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1247                                   struct nilfs_btree_path *path,
1248                                   int level, __u64 *keyp, __u64 *ptrp)
1249 {
1250         struct nilfs_btree_node *node;
1251         int ncblk;
1252
1253         if (level < nilfs_btree_height(btree) - 1) {
1254                 node = nilfs_btree_get_nonroot_node(path, level);
1255                 ncblk = nilfs_btree_nchildren_per_block(btree);
1256                 nilfs_btree_node_delete(node, path[level].bp_index,
1257                                         keyp, ptrp, ncblk);
1258                 if (!buffer_dirty(path[level].bp_bh))
1259                         mark_buffer_dirty(path[level].bp_bh);
1260                 if (path[level].bp_index == 0)
1261                         nilfs_btree_promote_key(btree, path, level + 1,
1262                                 nilfs_btree_node_get_key(node, 0));
1263         } else {
1264                 node = nilfs_btree_get_root(btree);
1265                 nilfs_btree_node_delete(node, path[level].bp_index,
1266                                         keyp, ptrp,
1267                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
1268         }
1269 }
1270
1271 static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1272                                     struct nilfs_btree_path *path,
1273                                     int level, __u64 *keyp, __u64 *ptrp)
1274 {
1275         struct nilfs_btree_node *node, *left;
1276         int nchildren, lnchildren, n, ncblk;
1277
1278         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1279
1280         node = nilfs_btree_get_nonroot_node(path, level);
1281         left = nilfs_btree_get_sib_node(path, level);
1282         nchildren = nilfs_btree_node_get_nchildren(node);
1283         lnchildren = nilfs_btree_node_get_nchildren(left);
1284         ncblk = nilfs_btree_nchildren_per_block(btree);
1285
1286         n = (nchildren + lnchildren) / 2 - nchildren;
1287
1288         nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1289
1290         if (!buffer_dirty(path[level].bp_bh))
1291                 mark_buffer_dirty(path[level].bp_bh);
1292         if (!buffer_dirty(path[level].bp_sib_bh))
1293                 mark_buffer_dirty(path[level].bp_sib_bh);
1294
1295         nilfs_btree_promote_key(btree, path, level + 1,
1296                                 nilfs_btree_node_get_key(node, 0));
1297
1298         brelse(path[level].bp_sib_bh);
1299         path[level].bp_sib_bh = NULL;
1300         path[level].bp_index += n;
1301 }
1302
1303 static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1304                                      struct nilfs_btree_path *path,
1305                                      int level, __u64 *keyp, __u64 *ptrp)
1306 {
1307         struct nilfs_btree_node *node, *right;
1308         int nchildren, rnchildren, n, ncblk;
1309
1310         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1311
1312         node = nilfs_btree_get_nonroot_node(path, level);
1313         right = nilfs_btree_get_sib_node(path, level);
1314         nchildren = nilfs_btree_node_get_nchildren(node);
1315         rnchildren = nilfs_btree_node_get_nchildren(right);
1316         ncblk = nilfs_btree_nchildren_per_block(btree);
1317
1318         n = (nchildren + rnchildren) / 2 - nchildren;
1319
1320         nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1321
1322         if (!buffer_dirty(path[level].bp_bh))
1323                 mark_buffer_dirty(path[level].bp_bh);
1324         if (!buffer_dirty(path[level].bp_sib_bh))
1325                 mark_buffer_dirty(path[level].bp_sib_bh);
1326
1327         path[level + 1].bp_index++;
1328         nilfs_btree_promote_key(btree, path, level + 1,
1329                                 nilfs_btree_node_get_key(right, 0));
1330         path[level + 1].bp_index--;
1331
1332         brelse(path[level].bp_sib_bh);
1333         path[level].bp_sib_bh = NULL;
1334 }
1335
1336 static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1337                                     struct nilfs_btree_path *path,
1338                                     int level, __u64 *keyp, __u64 *ptrp)
1339 {
1340         struct nilfs_btree_node *node, *left;
1341         int n, ncblk;
1342
1343         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1344
1345         node = nilfs_btree_get_nonroot_node(path, level);
1346         left = nilfs_btree_get_sib_node(path, level);
1347         ncblk = nilfs_btree_nchildren_per_block(btree);
1348
1349         n = nilfs_btree_node_get_nchildren(node);
1350
1351         nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1352
1353         if (!buffer_dirty(path[level].bp_sib_bh))
1354                 mark_buffer_dirty(path[level].bp_sib_bh);
1355
1356         nilfs_btnode_delete(path[level].bp_bh);
1357         path[level].bp_bh = path[level].bp_sib_bh;
1358         path[level].bp_sib_bh = NULL;
1359         path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1360 }
1361
1362 static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1363                                      struct nilfs_btree_path *path,
1364                                      int level, __u64 *keyp, __u64 *ptrp)
1365 {
1366         struct nilfs_btree_node *node, *right;
1367         int n, ncblk;
1368
1369         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1370
1371         node = nilfs_btree_get_nonroot_node(path, level);
1372         right = nilfs_btree_get_sib_node(path, level);
1373         ncblk = nilfs_btree_nchildren_per_block(btree);
1374
1375         n = nilfs_btree_node_get_nchildren(right);
1376
1377         nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1378
1379         if (!buffer_dirty(path[level].bp_bh))
1380                 mark_buffer_dirty(path[level].bp_bh);
1381
1382         nilfs_btnode_delete(path[level].bp_sib_bh);
1383         path[level].bp_sib_bh = NULL;
1384         path[level + 1].bp_index++;
1385 }
1386
1387 static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1388                                struct nilfs_btree_path *path,
1389                                int level, __u64 *keyp, __u64 *ptrp)
1390 {
1391         struct nilfs_btree_node *root, *child;
1392         int n, ncblk;
1393
1394         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1395
1396         root = nilfs_btree_get_root(btree);
1397         child = nilfs_btree_get_nonroot_node(path, level);
1398         ncblk = nilfs_btree_nchildren_per_block(btree);
1399
1400         nilfs_btree_node_delete(root, 0, NULL, NULL,
1401                                 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1402         nilfs_btree_node_set_level(root, level);
1403         n = nilfs_btree_node_get_nchildren(child);
1404         nilfs_btree_node_move_left(root, child, n,
1405                                    NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
1406
1407         nilfs_btnode_delete(path[level].bp_bh);
1408         path[level].bp_bh = NULL;
1409 }
1410
1411 static void nilfs_btree_nop(struct nilfs_bmap *btree,
1412                             struct nilfs_btree_path *path,
1413                             int level, __u64 *keyp, __u64 *ptrp)
1414 {
1415 }
1416
1417 static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1418                                       struct nilfs_btree_path *path,
1419                                       int *levelp,
1420                                       struct nilfs_bmap_stats *stats,
1421                                       struct inode *dat)
1422 {
1423         struct buffer_head *bh;
1424         struct nilfs_btree_node *node, *parent, *sib;
1425         __u64 sibptr;
1426         int pindex, dindex, level, ncmin, ncmax, ncblk, ret;
1427
1428         ret = 0;
1429         stats->bs_nblocks = 0;
1430         ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1431         ncblk = nilfs_btree_nchildren_per_block(btree);
1432
1433         for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index;
1434              level < nilfs_btree_height(btree) - 1;
1435              level++) {
1436                 node = nilfs_btree_get_nonroot_node(path, level);
1437                 path[level].bp_oldreq.bpr_ptr =
1438                         nilfs_btree_node_get_ptr(node, dindex, ncblk);
1439                 ret = nilfs_bmap_prepare_end_ptr(btree,
1440                                                  &path[level].bp_oldreq, dat);
1441                 if (ret < 0)
1442                         goto err_out_child_node;
1443
1444                 if (nilfs_btree_node_get_nchildren(node) > ncmin) {
1445                         path[level].bp_op = nilfs_btree_do_delete;
1446                         stats->bs_nblocks++;
1447                         goto out;
1448                 }
1449
1450                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1451                 pindex = path[level + 1].bp_index;
1452                 dindex = pindex;
1453
1454                 if (pindex > 0) {
1455                         /* left sibling */
1456                         sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1457                                                           ncmax);
1458                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1459                         if (ret < 0)
1460                                 goto err_out_curr_node;
1461                         sib = (struct nilfs_btree_node *)bh->b_data;
1462                         if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1463                                 path[level].bp_sib_bh = bh;
1464                                 path[level].bp_op = nilfs_btree_borrow_left;
1465                                 stats->bs_nblocks++;
1466                                 goto out;
1467                         } else {
1468                                 path[level].bp_sib_bh = bh;
1469                                 path[level].bp_op = nilfs_btree_concat_left;
1470                                 stats->bs_nblocks++;
1471                                 /* continue; */
1472                         }
1473                 } else if (pindex <
1474                            nilfs_btree_node_get_nchildren(parent) - 1) {
1475                         /* right sibling */
1476                         sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1477                                                           ncmax);
1478                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1479                         if (ret < 0)
1480                                 goto err_out_curr_node;
1481                         sib = (struct nilfs_btree_node *)bh->b_data;
1482                         if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1483                                 path[level].bp_sib_bh = bh;
1484                                 path[level].bp_op = nilfs_btree_borrow_right;
1485                                 stats->bs_nblocks++;
1486                                 goto out;
1487                         } else {
1488                                 path[level].bp_sib_bh = bh;
1489                                 path[level].bp_op = nilfs_btree_concat_right;
1490                                 stats->bs_nblocks++;
1491                                 /*
1492                                  * When merging right sibling node
1493                                  * into the current node, pointer to
1494                                  * the right sibling node must be
1495                                  * terminated instead.  The adjustment
1496                                  * below is required for that.
1497                                  */
1498                                 dindex = pindex + 1;
1499                                 /* continue; */
1500                         }
1501                 } else {
1502                         /* no siblings */
1503                         /* the only child of the root node */
1504                         WARN_ON(level != nilfs_btree_height(btree) - 2);
1505                         if (nilfs_btree_node_get_nchildren(node) - 1 <=
1506                             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1507                                 path[level].bp_op = nilfs_btree_shrink;
1508                                 stats->bs_nblocks += 2;
1509                                 level++;
1510                                 path[level].bp_op = nilfs_btree_nop;
1511                                 goto shrink_root_child;
1512                         } else {
1513                                 path[level].bp_op = nilfs_btree_do_delete;
1514                                 stats->bs_nblocks++;
1515                                 goto out;
1516                         }
1517                 }
1518         }
1519
1520         /* child of the root node is deleted */
1521         path[level].bp_op = nilfs_btree_do_delete;
1522         stats->bs_nblocks++;
1523
1524 shrink_root_child:
1525         node = nilfs_btree_get_root(btree);
1526         path[level].bp_oldreq.bpr_ptr =
1527                 nilfs_btree_node_get_ptr(node, dindex,
1528                                          NILFS_BTREE_ROOT_NCHILDREN_MAX);
1529
1530         ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1531         if (ret < 0)
1532                 goto err_out_child_node;
1533
1534         /* success */
1535  out:
1536         *levelp = level;
1537         return ret;
1538
1539         /* error */
1540  err_out_curr_node:
1541         nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1542  err_out_child_node:
1543         for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1544                 brelse(path[level].bp_sib_bh);
1545                 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1546         }
1547         *levelp = level;
1548         stats->bs_nblocks = 0;
1549         return ret;
1550 }
1551
1552 static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1553                                       struct nilfs_btree_path *path,
1554                                       int maxlevel, struct inode *dat)
1555 {
1556         int level;
1557
1558         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1559                 nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1560                 path[level].bp_op(btree, path, level, NULL, NULL);
1561         }
1562
1563         if (!nilfs_bmap_dirty(btree))
1564                 nilfs_bmap_set_dirty(btree);
1565 }
1566
1567 static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1568
1569 {
1570         struct nilfs_btree_path *path;
1571         struct nilfs_bmap_stats stats;
1572         struct inode *dat;
1573         int level, ret;
1574
1575         path = nilfs_btree_alloc_path();
1576         if (path == NULL)
1577                 return -ENOMEM;
1578
1579         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1580                                     NILFS_BTREE_LEVEL_NODE_MIN, 0);
1581         if (ret < 0)
1582                 goto out;
1583
1584
1585         dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1586
1587         ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1588         if (ret < 0)
1589                 goto out;
1590         nilfs_btree_commit_delete(btree, path, level, dat);
1591         nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks);
1592
1593 out:
1594         nilfs_btree_free_path(path);
1595         return ret;
1596 }
1597
1598 static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start,
1599                                 __u64 *keyp)
1600 {
1601         struct nilfs_btree_path *path;
1602         const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN;
1603         int ret;
1604
1605         path = nilfs_btree_alloc_path();
1606         if (!path)
1607                 return -ENOMEM;
1608
1609         ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0);
1610         if (!ret)
1611                 *keyp = start;
1612         else if (ret == -ENOENT)
1613                 ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp);
1614
1615         nilfs_btree_free_path(path);
1616         return ret;
1617 }
1618
1619 static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1620 {
1621         struct nilfs_btree_path *path;
1622         int ret;
1623
1624         path = nilfs_btree_alloc_path();
1625         if (path == NULL)
1626                 return -ENOMEM;
1627
1628         ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1629
1630         nilfs_btree_free_path(path);
1631
1632         return ret;
1633 }
1634
1635 static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1636 {
1637         struct buffer_head *bh;
1638         struct nilfs_btree_node *root, *node;
1639         __u64 maxkey, nextmaxkey;
1640         __u64 ptr;
1641         int nchildren, ret;
1642
1643         root = nilfs_btree_get_root(btree);
1644         switch (nilfs_btree_height(btree)) {
1645         case 2:
1646                 bh = NULL;
1647                 node = root;
1648                 break;
1649         case 3:
1650                 nchildren = nilfs_btree_node_get_nchildren(root);
1651                 if (nchildren > 1)
1652                         return 0;
1653                 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1654                                                NILFS_BTREE_ROOT_NCHILDREN_MAX);
1655                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1656                 if (ret < 0)
1657                         return ret;
1658                 node = (struct nilfs_btree_node *)bh->b_data;
1659                 break;
1660         default:
1661                 return 0;
1662         }
1663
1664         nchildren = nilfs_btree_node_get_nchildren(node);
1665         maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1666         nextmaxkey = (nchildren > 1) ?
1667                 nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1668         if (bh != NULL)
1669                 brelse(bh);
1670
1671         return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1672 }
1673
1674 static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1675                                    __u64 *keys, __u64 *ptrs, int nitems)
1676 {
1677         struct buffer_head *bh;
1678         struct nilfs_btree_node *node, *root;
1679         __le64 *dkeys;
1680         __le64 *dptrs;
1681         __u64 ptr;
1682         int nchildren, ncmax, i, ret;
1683
1684         root = nilfs_btree_get_root(btree);
1685         switch (nilfs_btree_height(btree)) {
1686         case 2:
1687                 bh = NULL;
1688                 node = root;
1689                 ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX;
1690                 break;
1691         case 3:
1692                 nchildren = nilfs_btree_node_get_nchildren(root);
1693                 WARN_ON(nchildren > 1);
1694                 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1695                                                NILFS_BTREE_ROOT_NCHILDREN_MAX);
1696                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1697                 if (ret < 0)
1698                         return ret;
1699                 node = (struct nilfs_btree_node *)bh->b_data;
1700                 ncmax = nilfs_btree_nchildren_per_block(btree);
1701                 break;
1702         default:
1703                 node = NULL;
1704                 return -EINVAL;
1705         }
1706
1707         nchildren = nilfs_btree_node_get_nchildren(node);
1708         if (nchildren < nitems)
1709                 nitems = nchildren;
1710         dkeys = nilfs_btree_node_dkeys(node);
1711         dptrs = nilfs_btree_node_dptrs(node, ncmax);
1712         for (i = 0; i < nitems; i++) {
1713                 keys[i] = le64_to_cpu(dkeys[i]);
1714                 ptrs[i] = le64_to_cpu(dptrs[i]);
1715         }
1716
1717         if (bh != NULL)
1718                 brelse(bh);
1719
1720         return nitems;
1721 }
1722
1723 static int
1724 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1725                                        union nilfs_bmap_ptr_req *dreq,
1726                                        union nilfs_bmap_ptr_req *nreq,
1727                                        struct buffer_head **bhp,
1728                                        struct nilfs_bmap_stats *stats)
1729 {
1730         struct buffer_head *bh;
1731         struct inode *dat = NULL;
1732         int ret;
1733
1734         stats->bs_nblocks = 0;
1735
1736         /* for data */
1737         /* cannot find near ptr */
1738         if (NILFS_BMAP_USE_VBN(btree)) {
1739                 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1740                 dat = nilfs_bmap_get_dat(btree);
1741         }
1742
1743         ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1744         if (ret < 0)
1745                 return ret;
1746
1747         *bhp = NULL;
1748         stats->bs_nblocks++;
1749         if (nreq != NULL) {
1750                 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1751                 ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1752                 if (ret < 0)
1753                         goto err_out_dreq;
1754
1755                 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1756                 if (ret < 0)
1757                         goto err_out_nreq;
1758
1759                 *bhp = bh;
1760                 stats->bs_nblocks++;
1761         }
1762
1763         /* success */
1764         return 0;
1765
1766         /* error */
1767  err_out_nreq:
1768         nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1769  err_out_dreq:
1770         nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1771         stats->bs_nblocks = 0;
1772         return ret;
1773
1774 }
1775
1776 static void
1777 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1778                                       __u64 key, __u64 ptr,
1779                                       const __u64 *keys, const __u64 *ptrs,
1780                                       int n,
1781                                       union nilfs_bmap_ptr_req *dreq,
1782                                       union nilfs_bmap_ptr_req *nreq,
1783                                       struct buffer_head *bh)
1784 {
1785         struct nilfs_btree_node *node;
1786         struct inode *dat;
1787         __u64 tmpptr;
1788         int ncblk;
1789
1790         /* free resources */
1791         if (btree->b_ops->bop_clear != NULL)
1792                 btree->b_ops->bop_clear(btree);
1793
1794         /* ptr must be a pointer to a buffer head. */
1795         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1796
1797         /* convert and insert */
1798         dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1799         __nilfs_btree_init(btree);
1800         if (nreq != NULL) {
1801                 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1802                 nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1803
1804                 /* create child node at level 1 */
1805                 node = (struct nilfs_btree_node *)bh->b_data;
1806                 ncblk = nilfs_btree_nchildren_per_block(btree);
1807                 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1808                 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1809                 if (!buffer_dirty(bh))
1810                         mark_buffer_dirty(bh);
1811                 if (!nilfs_bmap_dirty(btree))
1812                         nilfs_bmap_set_dirty(btree);
1813
1814                 brelse(bh);
1815
1816                 /* create root node at level 2 */
1817                 node = nilfs_btree_get_root(btree);
1818                 tmpptr = nreq->bpr_ptr;
1819                 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
1820                                       NILFS_BTREE_ROOT_NCHILDREN_MAX,
1821                                       &keys[0], &tmpptr);
1822         } else {
1823                 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1824
1825                 /* create root node at level 1 */
1826                 node = nilfs_btree_get_root(btree);
1827                 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
1828                                       NILFS_BTREE_ROOT_NCHILDREN_MAX,
1829                                       keys, ptrs);
1830                 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
1831                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
1832                 if (!nilfs_bmap_dirty(btree))
1833                         nilfs_bmap_set_dirty(btree);
1834         }
1835
1836         if (NILFS_BMAP_USE_VBN(btree))
1837                 nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1838 }
1839
1840 /**
1841  * nilfs_btree_convert_and_insert -
1842  * @bmap:
1843  * @key:
1844  * @ptr:
1845  * @keys:
1846  * @ptrs:
1847  * @n:
1848  */
1849 int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1850                                    __u64 key, __u64 ptr,
1851                                    const __u64 *keys, const __u64 *ptrs, int n)
1852 {
1853         struct buffer_head *bh = NULL;
1854         union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1855         struct nilfs_bmap_stats stats;
1856         int ret;
1857
1858         if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1859                 di = &dreq;
1860                 ni = NULL;
1861         } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1862                            1 << btree->b_inode->i_blkbits)) {
1863                 di = &dreq;
1864                 ni = &nreq;
1865         } else {
1866                 di = NULL;
1867                 ni = NULL;
1868                 BUG();
1869         }
1870
1871         ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1872                                                      &stats);
1873         if (ret < 0)
1874                 return ret;
1875         nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1876                                               di, ni, bh);
1877         nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks);
1878         return 0;
1879 }
1880
1881 static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1882                                    struct nilfs_btree_path *path,
1883                                    int level,
1884                                    struct buffer_head *bh)
1885 {
1886         while ((++level < nilfs_btree_height(btree) - 1) &&
1887                !buffer_dirty(path[level].bp_bh))
1888                 mark_buffer_dirty(path[level].bp_bh);
1889
1890         return 0;
1891 }
1892
1893 static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1894                                         struct nilfs_btree_path *path,
1895                                         int level, struct inode *dat)
1896 {
1897         struct nilfs_btree_node *parent;
1898         int ncmax, ret;
1899
1900         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1901         path[level].bp_oldreq.bpr_ptr =
1902                 nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1903                                          ncmax);
1904         path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1905         ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1906                                        &path[level].bp_newreq.bpr_req);
1907         if (ret < 0)
1908                 return ret;
1909
1910         if (buffer_nilfs_node(path[level].bp_bh)) {
1911                 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1912                 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1913                 path[level].bp_ctxt.bh = path[level].bp_bh;
1914                 ret = nilfs_btnode_prepare_change_key(
1915                         &NILFS_BMAP_I(btree)->i_btnode_cache,
1916                         &path[level].bp_ctxt);
1917                 if (ret < 0) {
1918                         nilfs_dat_abort_update(dat,
1919                                                &path[level].bp_oldreq.bpr_req,
1920                                                &path[level].bp_newreq.bpr_req);
1921                         return ret;
1922                 }
1923         }
1924
1925         return 0;
1926 }
1927
1928 static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1929                                         struct nilfs_btree_path *path,
1930                                         int level, struct inode *dat)
1931 {
1932         struct nilfs_btree_node *parent;
1933         int ncmax;
1934
1935         nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1936                                 &path[level].bp_newreq.bpr_req,
1937                                 btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1938
1939         if (buffer_nilfs_node(path[level].bp_bh)) {
1940                 nilfs_btnode_commit_change_key(
1941                         &NILFS_BMAP_I(btree)->i_btnode_cache,
1942                         &path[level].bp_ctxt);
1943                 path[level].bp_bh = path[level].bp_ctxt.bh;
1944         }
1945         set_buffer_nilfs_volatile(path[level].bp_bh);
1946
1947         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1948         nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
1949                                  path[level].bp_newreq.bpr_ptr, ncmax);
1950 }
1951
1952 static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1953                                        struct nilfs_btree_path *path,
1954                                        int level, struct inode *dat)
1955 {
1956         nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1957                                &path[level].bp_newreq.bpr_req);
1958         if (buffer_nilfs_node(path[level].bp_bh))
1959                 nilfs_btnode_abort_change_key(
1960                         &NILFS_BMAP_I(btree)->i_btnode_cache,
1961                         &path[level].bp_ctxt);
1962 }
1963
1964 static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1965                                            struct nilfs_btree_path *path,
1966                                            int minlevel, int *maxlevelp,
1967                                            struct inode *dat)
1968 {
1969         int level, ret;
1970
1971         level = minlevel;
1972         if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1973                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1974                 if (ret < 0)
1975                         return ret;
1976         }
1977         while ((++level < nilfs_btree_height(btree) - 1) &&
1978                !buffer_dirty(path[level].bp_bh)) {
1979
1980                 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1981                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1982                 if (ret < 0)
1983                         goto out;
1984         }
1985
1986         /* success */
1987         *maxlevelp = level - 1;
1988         return 0;
1989
1990         /* error */
1991  out:
1992         while (--level > minlevel)
1993                 nilfs_btree_abort_update_v(btree, path, level, dat);
1994         if (!buffer_nilfs_volatile(path[level].bp_bh))
1995                 nilfs_btree_abort_update_v(btree, path, level, dat);
1996         return ret;
1997 }
1998
1999 static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
2000                                            struct nilfs_btree_path *path,
2001                                            int minlevel, int maxlevel,
2002                                            struct buffer_head *bh,
2003                                            struct inode *dat)
2004 {
2005         int level;
2006
2007         if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
2008                 nilfs_btree_commit_update_v(btree, path, minlevel, dat);
2009
2010         for (level = minlevel + 1; level <= maxlevel; level++)
2011                 nilfs_btree_commit_update_v(btree, path, level, dat);
2012 }
2013
2014 static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
2015                                    struct nilfs_btree_path *path,
2016                                    int level, struct buffer_head *bh)
2017 {
2018         int maxlevel = 0, ret;
2019         struct nilfs_btree_node *parent;
2020         struct inode *dat = nilfs_bmap_get_dat(btree);
2021         __u64 ptr;
2022         int ncmax;
2023
2024         get_bh(bh);
2025         path[level].bp_bh = bh;
2026         ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
2027                                               dat);
2028         if (ret < 0)
2029                 goto out;
2030
2031         if (buffer_nilfs_volatile(path[level].bp_bh)) {
2032                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2033                 ptr = nilfs_btree_node_get_ptr(parent,
2034                                                path[level + 1].bp_index,
2035                                                ncmax);
2036                 ret = nilfs_dat_mark_dirty(dat, ptr);
2037                 if (ret < 0)
2038                         goto out;
2039         }
2040
2041         nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
2042
2043  out:
2044         brelse(path[level].bp_bh);
2045         path[level].bp_bh = NULL;
2046         return ret;
2047 }
2048
2049 static int nilfs_btree_propagate(struct nilfs_bmap *btree,
2050                                  struct buffer_head *bh)
2051 {
2052         struct nilfs_btree_path *path;
2053         struct nilfs_btree_node *node;
2054         __u64 key;
2055         int level, ret;
2056
2057         WARN_ON(!buffer_dirty(bh));
2058
2059         path = nilfs_btree_alloc_path();
2060         if (path == NULL)
2061                 return -ENOMEM;
2062
2063         if (buffer_nilfs_node(bh)) {
2064                 node = (struct nilfs_btree_node *)bh->b_data;
2065                 key = nilfs_btree_node_get_key(node, 0);
2066                 level = nilfs_btree_node_get_level(node);
2067         } else {
2068                 key = nilfs_bmap_data_get_key(btree, bh);
2069                 level = NILFS_BTREE_LEVEL_DATA;
2070         }
2071
2072         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2073         if (ret < 0) {
2074                 if (unlikely(ret == -ENOENT))
2075                         printk(KERN_CRIT "%s: key = %llu, level == %d\n",
2076                                __func__, (unsigned long long)key, level);
2077                 goto out;
2078         }
2079
2080         ret = NILFS_BMAP_USE_VBN(btree) ?
2081                 nilfs_btree_propagate_v(btree, path, level, bh) :
2082                 nilfs_btree_propagate_p(btree, path, level, bh);
2083
2084  out:
2085         nilfs_btree_free_path(path);
2086
2087         return ret;
2088 }
2089
2090 static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
2091                                     struct buffer_head *bh)
2092 {
2093         return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
2094 }
2095
2096 static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
2097                                          struct list_head *lists,
2098                                          struct buffer_head *bh)
2099 {
2100         struct list_head *head;
2101         struct buffer_head *cbh;
2102         struct nilfs_btree_node *node, *cnode;
2103         __u64 key, ckey;
2104         int level;
2105
2106         get_bh(bh);
2107         node = (struct nilfs_btree_node *)bh->b_data;
2108         key = nilfs_btree_node_get_key(node, 0);
2109         level = nilfs_btree_node_get_level(node);
2110         if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
2111             level >= NILFS_BTREE_LEVEL_MAX) {
2112                 dump_stack();
2113                 printk(KERN_WARNING
2114                        "%s: invalid btree level: %d (key=%llu, ino=%lu, "
2115                        "blocknr=%llu)\n",
2116                        __func__, level, (unsigned long long)key,
2117                        NILFS_BMAP_I(btree)->vfs_inode.i_ino,
2118                        (unsigned long long)bh->b_blocknr);
2119                 return;
2120         }
2121
2122         list_for_each(head, &lists[level]) {
2123                 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2124                 cnode = (struct nilfs_btree_node *)cbh->b_data;
2125                 ckey = nilfs_btree_node_get_key(cnode, 0);
2126                 if (key < ckey)
2127                         break;
2128         }
2129         list_add_tail(&bh->b_assoc_buffers, head);
2130 }
2131
2132 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
2133                                              struct list_head *listp)
2134 {
2135         struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache;
2136         struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2137         struct pagevec pvec;
2138         struct buffer_head *bh, *head;
2139         pgoff_t index = 0;
2140         int level, i;
2141
2142         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2143              level < NILFS_BTREE_LEVEL_MAX;
2144              level++)
2145                 INIT_LIST_HEAD(&lists[level]);
2146
2147         pagevec_init(&pvec, 0);
2148
2149         while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
2150                                   PAGEVEC_SIZE)) {
2151                 for (i = 0; i < pagevec_count(&pvec); i++) {
2152                         bh = head = page_buffers(pvec.pages[i]);
2153                         do {
2154                                 if (buffer_dirty(bh))
2155                                         nilfs_btree_add_dirty_buffer(btree,
2156                                                                      lists, bh);
2157                         } while ((bh = bh->b_this_page) != head);
2158                 }
2159                 pagevec_release(&pvec);
2160                 cond_resched();
2161         }
2162
2163         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2164              level < NILFS_BTREE_LEVEL_MAX;
2165              level++)
2166                 list_splice_tail(&lists[level], listp);
2167 }
2168
2169 static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
2170                                 struct nilfs_btree_path *path,
2171                                 int level,
2172                                 struct buffer_head **bh,
2173                                 sector_t blocknr,
2174                                 union nilfs_binfo *binfo)
2175 {
2176         struct nilfs_btree_node *parent;
2177         __u64 key;
2178         __u64 ptr;
2179         int ncmax, ret;
2180
2181         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2182         ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2183                                        ncmax);
2184         if (buffer_nilfs_node(*bh)) {
2185                 path[level].bp_ctxt.oldkey = ptr;
2186                 path[level].bp_ctxt.newkey = blocknr;
2187                 path[level].bp_ctxt.bh = *bh;
2188                 ret = nilfs_btnode_prepare_change_key(
2189                         &NILFS_BMAP_I(btree)->i_btnode_cache,
2190                         &path[level].bp_ctxt);
2191                 if (ret < 0)
2192                         return ret;
2193                 nilfs_btnode_commit_change_key(
2194                         &NILFS_BMAP_I(btree)->i_btnode_cache,
2195                         &path[level].bp_ctxt);
2196                 *bh = path[level].bp_ctxt.bh;
2197         }
2198
2199         nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
2200                                  ncmax);
2201
2202         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2203         /* on-disk format */
2204         binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2205         binfo->bi_dat.bi_level = level;
2206
2207         return 0;
2208 }
2209
2210 static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2211                                 struct nilfs_btree_path *path,
2212                                 int level,
2213                                 struct buffer_head **bh,
2214                                 sector_t blocknr,
2215                                 union nilfs_binfo *binfo)
2216 {
2217         struct nilfs_btree_node *parent;
2218         struct inode *dat = nilfs_bmap_get_dat(btree);
2219         __u64 key;
2220         __u64 ptr;
2221         union nilfs_bmap_ptr_req req;
2222         int ncmax, ret;
2223
2224         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2225         ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2226                                        ncmax);
2227         req.bpr_ptr = ptr;
2228         ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2229         if (ret < 0)
2230                 return ret;
2231         nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2232
2233         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2234         /* on-disk format */
2235         binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2236         binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2237
2238         return 0;
2239 }
2240
2241 static int nilfs_btree_assign(struct nilfs_bmap *btree,
2242                               struct buffer_head **bh,
2243                               sector_t blocknr,
2244                               union nilfs_binfo *binfo)
2245 {
2246         struct nilfs_btree_path *path;
2247         struct nilfs_btree_node *node;
2248         __u64 key;
2249         int level, ret;
2250
2251         path = nilfs_btree_alloc_path();
2252         if (path == NULL)
2253                 return -ENOMEM;
2254
2255         if (buffer_nilfs_node(*bh)) {
2256                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2257                 key = nilfs_btree_node_get_key(node, 0);
2258                 level = nilfs_btree_node_get_level(node);
2259         } else {
2260                 key = nilfs_bmap_data_get_key(btree, *bh);
2261                 level = NILFS_BTREE_LEVEL_DATA;
2262         }
2263
2264         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2265         if (ret < 0) {
2266                 WARN_ON(ret == -ENOENT);
2267                 goto out;
2268         }
2269
2270         ret = NILFS_BMAP_USE_VBN(btree) ?
2271                 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2272                 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2273
2274  out:
2275         nilfs_btree_free_path(path);
2276
2277         return ret;
2278 }
2279
2280 static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2281                                  struct buffer_head **bh,
2282                                  sector_t blocknr,
2283                                  union nilfs_binfo *binfo)
2284 {
2285         struct nilfs_btree_node *node;
2286         __u64 key;
2287         int ret;
2288
2289         ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2290                              blocknr);
2291         if (ret < 0)
2292                 return ret;
2293
2294         if (buffer_nilfs_node(*bh)) {
2295                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2296                 key = nilfs_btree_node_get_key(node, 0);
2297         } else
2298                 key = nilfs_bmap_data_get_key(btree, *bh);
2299
2300         /* on-disk format */
2301         binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2302         binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2303
2304         return 0;
2305 }
2306
2307 static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2308 {
2309         struct buffer_head *bh;
2310         struct nilfs_btree_path *path;
2311         __u64 ptr;
2312         int ret;
2313
2314         path = nilfs_btree_alloc_path();
2315         if (path == NULL)
2316                 return -ENOMEM;
2317
2318         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0);
2319         if (ret < 0) {
2320                 WARN_ON(ret == -ENOENT);
2321                 goto out;
2322         }
2323         ret = nilfs_btree_get_block(btree, ptr, &bh);
2324         if (ret < 0) {
2325                 WARN_ON(ret == -ENOENT);
2326                 goto out;
2327         }
2328
2329         if (!buffer_dirty(bh))
2330                 mark_buffer_dirty(bh);
2331         brelse(bh);
2332         if (!nilfs_bmap_dirty(btree))
2333                 nilfs_bmap_set_dirty(btree);
2334
2335  out:
2336         nilfs_btree_free_path(path);
2337         return ret;
2338 }
2339
2340 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2341         .bop_lookup             =       nilfs_btree_lookup,
2342         .bop_lookup_contig      =       nilfs_btree_lookup_contig,
2343         .bop_insert             =       nilfs_btree_insert,
2344         .bop_delete             =       nilfs_btree_delete,
2345         .bop_clear              =       NULL,
2346
2347         .bop_propagate          =       nilfs_btree_propagate,
2348
2349         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2350
2351         .bop_assign             =       nilfs_btree_assign,
2352         .bop_mark               =       nilfs_btree_mark,
2353
2354         .bop_seek_key           =       nilfs_btree_seek_key,
2355         .bop_last_key           =       nilfs_btree_last_key,
2356
2357         .bop_check_insert       =       NULL,
2358         .bop_check_delete       =       nilfs_btree_check_delete,
2359         .bop_gather_data        =       nilfs_btree_gather_data,
2360 };
2361
2362 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2363         .bop_lookup             =       NULL,
2364         .bop_lookup_contig      =       NULL,
2365         .bop_insert             =       NULL,
2366         .bop_delete             =       NULL,
2367         .bop_clear              =       NULL,
2368
2369         .bop_propagate          =       nilfs_btree_propagate_gc,
2370
2371         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2372
2373         .bop_assign             =       nilfs_btree_assign_gc,
2374         .bop_mark               =       NULL,
2375
2376         .bop_seek_key           =       NULL,
2377         .bop_last_key           =       NULL,
2378
2379         .bop_check_insert       =       NULL,
2380         .bop_check_delete       =       NULL,
2381         .bop_gather_data        =       NULL,
2382 };
2383
2384 static void __nilfs_btree_init(struct nilfs_bmap *bmap)
2385 {
2386         bmap->b_ops = &nilfs_btree_ops;
2387         bmap->b_nchildren_per_block =
2388                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2389 }
2390
2391 int nilfs_btree_init(struct nilfs_bmap *bmap)
2392 {
2393         int ret = 0;
2394
2395         __nilfs_btree_init(bmap);
2396
2397         if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap),
2398                                     bmap->b_inode->i_ino))
2399                 ret = -EIO;
2400         return ret;
2401 }
2402
2403 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2404 {
2405         bmap->b_ops = &nilfs_btree_ops_gc;
2406         bmap->b_nchildren_per_block =
2407                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2408 }