btrfs: unsplit printed strings
[cascardo/linux.git] / fs / btrfs / check-integrity.c
1 /*
2  * Copyright (C) STRATO AG 2011.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 /*
20  * This module can be used to catch cases when the btrfs kernel
21  * code executes write requests to the disk that bring the file
22  * system in an inconsistent state. In such a state, a power-loss
23  * or kernel panic event would cause that the data on disk is
24  * lost or at least damaged.
25  *
26  * Code is added that examines all block write requests during
27  * runtime (including writes of the super block). Three rules
28  * are verified and an error is printed on violation of the
29  * rules:
30  * 1. It is not allowed to write a disk block which is
31  *    currently referenced by the super block (either directly
32  *    or indirectly).
33  * 2. When a super block is written, it is verified that all
34  *    referenced (directly or indirectly) blocks fulfill the
35  *    following requirements:
36  *    2a. All referenced blocks have either been present when
37  *        the file system was mounted, (i.e., they have been
38  *        referenced by the super block) or they have been
39  *        written since then and the write completion callback
40  *        was called and no write error was indicated and a
41  *        FLUSH request to the device where these blocks are
42  *        located was received and completed.
43  *    2b. All referenced blocks need to have a generation
44  *        number which is equal to the parent's number.
45  *
46  * One issue that was found using this module was that the log
47  * tree on disk became temporarily corrupted because disk blocks
48  * that had been in use for the log tree had been freed and
49  * reused too early, while being referenced by the written super
50  * block.
51  *
52  * The search term in the kernel log that can be used to filter
53  * on the existence of detected integrity issues is
54  * "btrfs: attempt".
55  *
56  * The integrity check is enabled via mount options. These
57  * mount options are only supported if the integrity check
58  * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY.
59  *
60  * Example #1, apply integrity checks to all metadata:
61  * mount /dev/sdb1 /mnt -o check_int
62  *
63  * Example #2, apply integrity checks to all metadata and
64  * to data extents:
65  * mount /dev/sdb1 /mnt -o check_int_data
66  *
67  * Example #3, apply integrity checks to all metadata and dump
68  * the tree that the super block references to kernel messages
69  * each time after a super block was written:
70  * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263
71  *
72  * If the integrity check tool is included and activated in
73  * the mount options, plenty of kernel memory is used, and
74  * plenty of additional CPU cycles are spent. Enabling this
75  * functionality is not intended for normal use. In most
76  * cases, unless you are a btrfs developer who needs to verify
77  * the integrity of (super)-block write requests, do not
78  * enable the config option BTRFS_FS_CHECK_INTEGRITY to
79  * include and compile the integrity check tool.
80  *
81  * Expect millions of lines of information in the kernel log with an
82  * enabled check_int_print_mask. Therefore set LOG_BUF_SHIFT in the
83  * kernel config to at least 26 (which is 64MB). Usually the value is
84  * limited to 21 (which is 2MB) in init/Kconfig. The file needs to be
85  * changed like this before LOG_BUF_SHIFT can be set to a high value:
86  * config LOG_BUF_SHIFT
87  *       int "Kernel log buffer size (16 => 64KB, 17 => 128KB)"
88  *       range 12 30
89  */
90
91 #include <linux/sched.h>
92 #include <linux/slab.h>
93 #include <linux/buffer_head.h>
94 #include <linux/mutex.h>
95 #include <linux/genhd.h>
96 #include <linux/blkdev.h>
97 #include <linux/vmalloc.h>
98 #include <linux/string.h>
99 #include "ctree.h"
100 #include "disk-io.h"
101 #include "hash.h"
102 #include "transaction.h"
103 #include "extent_io.h"
104 #include "volumes.h"
105 #include "print-tree.h"
106 #include "locking.h"
107 #include "check-integrity.h"
108 #include "rcu-string.h"
109 #include "compression.h"
110
111 #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
112 #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
113 #define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100
114 #define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051
115 #define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807
116 #define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530
117 #define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300
118 #define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6)    /* in characters,
119                                                          * excluding " [...]" */
120 #define BTRFSIC_GENERATION_UNKNOWN ((u64)-1)
121
122 /*
123  * The definition of the bitmask fields for the print_mask.
124  * They are specified with the mount option check_integrity_print_mask.
125  */
126 #define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE                     0x00000001
127 #define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION         0x00000002
128 #define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE                  0x00000004
129 #define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE                 0x00000008
130 #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH                        0x00000010
131 #define BTRFSIC_PRINT_MASK_END_IO_BIO_BH                        0x00000020
132 #define BTRFSIC_PRINT_MASK_VERBOSE                              0x00000040
133 #define BTRFSIC_PRINT_MASK_VERY_VERBOSE                         0x00000080
134 #define BTRFSIC_PRINT_MASK_INITIAL_TREE                         0x00000100
135 #define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES                    0x00000200
136 #define BTRFSIC_PRINT_MASK_INITIAL_DATABASE                     0x00000400
137 #define BTRFSIC_PRINT_MASK_NUM_COPIES                           0x00000800
138 #define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS                0x00001000
139 #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE                0x00002000
140
141 struct btrfsic_dev_state;
142 struct btrfsic_state;
143
144 struct btrfsic_block {
145         u32 magic_num;          /* only used for debug purposes */
146         unsigned int is_metadata:1;     /* if it is meta-data, not data-data */
147         unsigned int is_superblock:1;   /* if it is one of the superblocks */
148         unsigned int is_iodone:1;       /* if is done by lower subsystem */
149         unsigned int iodone_w_error:1;  /* error was indicated to endio */
150         unsigned int never_written:1;   /* block was added because it was
151                                          * referenced, not because it was
152                                          * written */
153         unsigned int mirror_num;        /* large enough to hold
154                                          * BTRFS_SUPER_MIRROR_MAX */
155         struct btrfsic_dev_state *dev_state;
156         u64 dev_bytenr;         /* key, physical byte num on disk */
157         u64 logical_bytenr;     /* logical byte num on disk */
158         u64 generation;
159         struct btrfs_disk_key disk_key; /* extra info to print in case of
160                                          * issues, will not always be correct */
161         struct list_head collision_resolving_node;      /* list node */
162         struct list_head all_blocks_node;       /* list node */
163
164         /* the following two lists contain block_link items */
165         struct list_head ref_to_list;   /* list */
166         struct list_head ref_from_list; /* list */
167         struct btrfsic_block *next_in_same_bio;
168         void *orig_bio_bh_private;
169         union {
170                 bio_end_io_t *bio;
171                 bh_end_io_t *bh;
172         } orig_bio_bh_end_io;
173         int submit_bio_bh_rw;
174         u64 flush_gen; /* only valid if !never_written */
175 };
176
177 /*
178  * Elements of this type are allocated dynamically and required because
179  * each block object can refer to and can be ref from multiple blocks.
180  * The key to lookup them in the hashtable is the dev_bytenr of
181  * the block ref to plus the one from the block referred from.
182  * The fact that they are searchable via a hashtable and that a
183  * ref_cnt is maintained is not required for the btrfs integrity
184  * check algorithm itself, it is only used to make the output more
185  * beautiful in case that an error is detected (an error is defined
186  * as a write operation to a block while that block is still referenced).
187  */
188 struct btrfsic_block_link {
189         u32 magic_num;          /* only used for debug purposes */
190         u32 ref_cnt;
191         struct list_head node_ref_to;   /* list node */
192         struct list_head node_ref_from; /* list node */
193         struct list_head collision_resolving_node;      /* list node */
194         struct btrfsic_block *block_ref_to;
195         struct btrfsic_block *block_ref_from;
196         u64 parent_generation;
197 };
198
199 struct btrfsic_dev_state {
200         u32 magic_num;          /* only used for debug purposes */
201         struct block_device *bdev;
202         struct btrfsic_state *state;
203         struct list_head collision_resolving_node;      /* list node */
204         struct btrfsic_block dummy_block_for_bio_bh_flush;
205         u64 last_flush_gen;
206         char name[BDEVNAME_SIZE];
207 };
208
209 struct btrfsic_block_hashtable {
210         struct list_head table[BTRFSIC_BLOCK_HASHTABLE_SIZE];
211 };
212
213 struct btrfsic_block_link_hashtable {
214         struct list_head table[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE];
215 };
216
217 struct btrfsic_dev_state_hashtable {
218         struct list_head table[BTRFSIC_DEV2STATE_HASHTABLE_SIZE];
219 };
220
221 struct btrfsic_block_data_ctx {
222         u64 start;              /* virtual bytenr */
223         u64 dev_bytenr;         /* physical bytenr on device */
224         u32 len;
225         struct btrfsic_dev_state *dev;
226         char **datav;
227         struct page **pagev;
228         void *mem_to_free;
229 };
230
231 /* This structure is used to implement recursion without occupying
232  * any stack space, refer to btrfsic_process_metablock() */
233 struct btrfsic_stack_frame {
234         u32 magic;
235         u32 nr;
236         int error;
237         int i;
238         int limit_nesting;
239         int num_copies;
240         int mirror_num;
241         struct btrfsic_block *block;
242         struct btrfsic_block_data_ctx *block_ctx;
243         struct btrfsic_block *next_block;
244         struct btrfsic_block_data_ctx next_block_ctx;
245         struct btrfs_header *hdr;
246         struct btrfsic_stack_frame *prev;
247 };
248
249 /* Some state per mounted filesystem */
250 struct btrfsic_state {
251         u32 print_mask;
252         int include_extent_data;
253         int csum_size;
254         struct list_head all_blocks_list;
255         struct btrfsic_block_hashtable block_hashtable;
256         struct btrfsic_block_link_hashtable block_link_hashtable;
257         struct btrfs_root *root;
258         u64 max_superblock_generation;
259         struct btrfsic_block *latest_superblock;
260         u32 metablock_size;
261         u32 datablock_size;
262 };
263
264 static void btrfsic_block_init(struct btrfsic_block *b);
265 static struct btrfsic_block *btrfsic_block_alloc(void);
266 static void btrfsic_block_free(struct btrfsic_block *b);
267 static void btrfsic_block_link_init(struct btrfsic_block_link *n);
268 static struct btrfsic_block_link *btrfsic_block_link_alloc(void);
269 static void btrfsic_block_link_free(struct btrfsic_block_link *n);
270 static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds);
271 static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void);
272 static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds);
273 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h);
274 static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
275                                         struct btrfsic_block_hashtable *h);
276 static void btrfsic_block_hashtable_remove(struct btrfsic_block *b);
277 static struct btrfsic_block *btrfsic_block_hashtable_lookup(
278                 struct block_device *bdev,
279                 u64 dev_bytenr,
280                 struct btrfsic_block_hashtable *h);
281 static void btrfsic_block_link_hashtable_init(
282                 struct btrfsic_block_link_hashtable *h);
283 static void btrfsic_block_link_hashtable_add(
284                 struct btrfsic_block_link *l,
285                 struct btrfsic_block_link_hashtable *h);
286 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l);
287 static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
288                 struct block_device *bdev_ref_to,
289                 u64 dev_bytenr_ref_to,
290                 struct block_device *bdev_ref_from,
291                 u64 dev_bytenr_ref_from,
292                 struct btrfsic_block_link_hashtable *h);
293 static void btrfsic_dev_state_hashtable_init(
294                 struct btrfsic_dev_state_hashtable *h);
295 static void btrfsic_dev_state_hashtable_add(
296                 struct btrfsic_dev_state *ds,
297                 struct btrfsic_dev_state_hashtable *h);
298 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds);
299 static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
300                 struct block_device *bdev,
301                 struct btrfsic_dev_state_hashtable *h);
302 static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void);
303 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf);
304 static int btrfsic_process_superblock(struct btrfsic_state *state,
305                                       struct btrfs_fs_devices *fs_devices);
306 static int btrfsic_process_metablock(struct btrfsic_state *state,
307                                      struct btrfsic_block *block,
308                                      struct btrfsic_block_data_ctx *block_ctx,
309                                      int limit_nesting, int force_iodone_flag);
310 static void btrfsic_read_from_block_data(
311         struct btrfsic_block_data_ctx *block_ctx,
312         void *dst, u32 offset, size_t len);
313 static int btrfsic_create_link_to_next_block(
314                 struct btrfsic_state *state,
315                 struct btrfsic_block *block,
316                 struct btrfsic_block_data_ctx
317                 *block_ctx, u64 next_bytenr,
318                 int limit_nesting,
319                 struct btrfsic_block_data_ctx *next_block_ctx,
320                 struct btrfsic_block **next_blockp,
321                 int force_iodone_flag,
322                 int *num_copiesp, int *mirror_nump,
323                 struct btrfs_disk_key *disk_key,
324                 u64 parent_generation);
325 static int btrfsic_handle_extent_data(struct btrfsic_state *state,
326                                       struct btrfsic_block *block,
327                                       struct btrfsic_block_data_ctx *block_ctx,
328                                       u32 item_offset, int force_iodone_flag);
329 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
330                              struct btrfsic_block_data_ctx *block_ctx_out,
331                              int mirror_num);
332 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx);
333 static int btrfsic_read_block(struct btrfsic_state *state,
334                               struct btrfsic_block_data_ctx *block_ctx);
335 static void btrfsic_dump_database(struct btrfsic_state *state);
336 static int btrfsic_test_for_metadata(struct btrfsic_state *state,
337                                      char **datav, unsigned int num_pages);
338 static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
339                                           u64 dev_bytenr, char **mapped_datav,
340                                           unsigned int num_pages,
341                                           struct bio *bio, int *bio_is_patched,
342                                           struct buffer_head *bh,
343                                           int submit_bio_bh_rw);
344 static int btrfsic_process_written_superblock(
345                 struct btrfsic_state *state,
346                 struct btrfsic_block *const block,
347                 struct btrfs_super_block *const super_hdr);
348 static void btrfsic_bio_end_io(struct bio *bp);
349 static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate);
350 static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state,
351                                               const struct btrfsic_block *block,
352                                               int recursion_level);
353 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
354                                         struct btrfsic_block *const block,
355                                         int recursion_level);
356 static void btrfsic_print_add_link(const struct btrfsic_state *state,
357                                    const struct btrfsic_block_link *l);
358 static void btrfsic_print_rem_link(const struct btrfsic_state *state,
359                                    const struct btrfsic_block_link *l);
360 static char btrfsic_get_block_type(const struct btrfsic_state *state,
361                                    const struct btrfsic_block *block);
362 static void btrfsic_dump_tree(const struct btrfsic_state *state);
363 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
364                                   const struct btrfsic_block *block,
365                                   int indent_level);
366 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
367                 struct btrfsic_state *state,
368                 struct btrfsic_block_data_ctx *next_block_ctx,
369                 struct btrfsic_block *next_block,
370                 struct btrfsic_block *from_block,
371                 u64 parent_generation);
372 static struct btrfsic_block *btrfsic_block_lookup_or_add(
373                 struct btrfsic_state *state,
374                 struct btrfsic_block_data_ctx *block_ctx,
375                 const char *additional_string,
376                 int is_metadata,
377                 int is_iodone,
378                 int never_written,
379                 int mirror_num,
380                 int *was_created);
381 static int btrfsic_process_superblock_dev_mirror(
382                 struct btrfsic_state *state,
383                 struct btrfsic_dev_state *dev_state,
384                 struct btrfs_device *device,
385                 int superblock_mirror_num,
386                 struct btrfsic_dev_state **selected_dev_state,
387                 struct btrfs_super_block *selected_super);
388 static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
389                 struct block_device *bdev);
390 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
391                                            u64 bytenr,
392                                            struct btrfsic_dev_state *dev_state,
393                                            u64 dev_bytenr);
394
395 static struct mutex btrfsic_mutex;
396 static int btrfsic_is_initialized;
397 static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable;
398
399
400 static void btrfsic_block_init(struct btrfsic_block *b)
401 {
402         b->magic_num = BTRFSIC_BLOCK_MAGIC_NUMBER;
403         b->dev_state = NULL;
404         b->dev_bytenr = 0;
405         b->logical_bytenr = 0;
406         b->generation = BTRFSIC_GENERATION_UNKNOWN;
407         b->disk_key.objectid = 0;
408         b->disk_key.type = 0;
409         b->disk_key.offset = 0;
410         b->is_metadata = 0;
411         b->is_superblock = 0;
412         b->is_iodone = 0;
413         b->iodone_w_error = 0;
414         b->never_written = 0;
415         b->mirror_num = 0;
416         b->next_in_same_bio = NULL;
417         b->orig_bio_bh_private = NULL;
418         b->orig_bio_bh_end_io.bio = NULL;
419         INIT_LIST_HEAD(&b->collision_resolving_node);
420         INIT_LIST_HEAD(&b->all_blocks_node);
421         INIT_LIST_HEAD(&b->ref_to_list);
422         INIT_LIST_HEAD(&b->ref_from_list);
423         b->submit_bio_bh_rw = 0;
424         b->flush_gen = 0;
425 }
426
427 static struct btrfsic_block *btrfsic_block_alloc(void)
428 {
429         struct btrfsic_block *b;
430
431         b = kzalloc(sizeof(*b), GFP_NOFS);
432         if (NULL != b)
433                 btrfsic_block_init(b);
434
435         return b;
436 }
437
438 static void btrfsic_block_free(struct btrfsic_block *b)
439 {
440         BUG_ON(!(NULL == b || BTRFSIC_BLOCK_MAGIC_NUMBER == b->magic_num));
441         kfree(b);
442 }
443
444 static void btrfsic_block_link_init(struct btrfsic_block_link *l)
445 {
446         l->magic_num = BTRFSIC_BLOCK_LINK_MAGIC_NUMBER;
447         l->ref_cnt = 1;
448         INIT_LIST_HEAD(&l->node_ref_to);
449         INIT_LIST_HEAD(&l->node_ref_from);
450         INIT_LIST_HEAD(&l->collision_resolving_node);
451         l->block_ref_to = NULL;
452         l->block_ref_from = NULL;
453 }
454
455 static struct btrfsic_block_link *btrfsic_block_link_alloc(void)
456 {
457         struct btrfsic_block_link *l;
458
459         l = kzalloc(sizeof(*l), GFP_NOFS);
460         if (NULL != l)
461                 btrfsic_block_link_init(l);
462
463         return l;
464 }
465
466 static void btrfsic_block_link_free(struct btrfsic_block_link *l)
467 {
468         BUG_ON(!(NULL == l || BTRFSIC_BLOCK_LINK_MAGIC_NUMBER == l->magic_num));
469         kfree(l);
470 }
471
472 static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds)
473 {
474         ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER;
475         ds->bdev = NULL;
476         ds->state = NULL;
477         ds->name[0] = '\0';
478         INIT_LIST_HEAD(&ds->collision_resolving_node);
479         ds->last_flush_gen = 0;
480         btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush);
481         ds->dummy_block_for_bio_bh_flush.is_iodone = 1;
482         ds->dummy_block_for_bio_bh_flush.dev_state = ds;
483 }
484
485 static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void)
486 {
487         struct btrfsic_dev_state *ds;
488
489         ds = kzalloc(sizeof(*ds), GFP_NOFS);
490         if (NULL != ds)
491                 btrfsic_dev_state_init(ds);
492
493         return ds;
494 }
495
496 static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds)
497 {
498         BUG_ON(!(NULL == ds ||
499                  BTRFSIC_DEV2STATE_MAGIC_NUMBER == ds->magic_num));
500         kfree(ds);
501 }
502
503 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h)
504 {
505         int i;
506
507         for (i = 0; i < BTRFSIC_BLOCK_HASHTABLE_SIZE; i++)
508                 INIT_LIST_HEAD(h->table + i);
509 }
510
511 static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
512                                         struct btrfsic_block_hashtable *h)
513 {
514         const unsigned int hashval =
515             (((unsigned int)(b->dev_bytenr >> 16)) ^
516              ((unsigned int)((uintptr_t)b->dev_state->bdev))) &
517              (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
518
519         list_add(&b->collision_resolving_node, h->table + hashval);
520 }
521
522 static void btrfsic_block_hashtable_remove(struct btrfsic_block *b)
523 {
524         list_del(&b->collision_resolving_node);
525 }
526
527 static struct btrfsic_block *btrfsic_block_hashtable_lookup(
528                 struct block_device *bdev,
529                 u64 dev_bytenr,
530                 struct btrfsic_block_hashtable *h)
531 {
532         const unsigned int hashval =
533             (((unsigned int)(dev_bytenr >> 16)) ^
534              ((unsigned int)((uintptr_t)bdev))) &
535              (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
536         struct btrfsic_block *b;
537
538         list_for_each_entry(b, h->table + hashval, collision_resolving_node) {
539                 if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr)
540                         return b;
541         }
542
543         return NULL;
544 }
545
546 static void btrfsic_block_link_hashtable_init(
547                 struct btrfsic_block_link_hashtable *h)
548 {
549         int i;
550
551         for (i = 0; i < BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE; i++)
552                 INIT_LIST_HEAD(h->table + i);
553 }
554
555 static void btrfsic_block_link_hashtable_add(
556                 struct btrfsic_block_link *l,
557                 struct btrfsic_block_link_hashtable *h)
558 {
559         const unsigned int hashval =
560             (((unsigned int)(l->block_ref_to->dev_bytenr >> 16)) ^
561              ((unsigned int)(l->block_ref_from->dev_bytenr >> 16)) ^
562              ((unsigned int)((uintptr_t)l->block_ref_to->dev_state->bdev)) ^
563              ((unsigned int)((uintptr_t)l->block_ref_from->dev_state->bdev)))
564              & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
565
566         BUG_ON(NULL == l->block_ref_to);
567         BUG_ON(NULL == l->block_ref_from);
568         list_add(&l->collision_resolving_node, h->table + hashval);
569 }
570
571 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l)
572 {
573         list_del(&l->collision_resolving_node);
574 }
575
576 static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
577                 struct block_device *bdev_ref_to,
578                 u64 dev_bytenr_ref_to,
579                 struct block_device *bdev_ref_from,
580                 u64 dev_bytenr_ref_from,
581                 struct btrfsic_block_link_hashtable *h)
582 {
583         const unsigned int hashval =
584             (((unsigned int)(dev_bytenr_ref_to >> 16)) ^
585              ((unsigned int)(dev_bytenr_ref_from >> 16)) ^
586              ((unsigned int)((uintptr_t)bdev_ref_to)) ^
587              ((unsigned int)((uintptr_t)bdev_ref_from))) &
588              (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
589         struct btrfsic_block_link *l;
590
591         list_for_each_entry(l, h->table + hashval, collision_resolving_node) {
592                 BUG_ON(NULL == l->block_ref_to);
593                 BUG_ON(NULL == l->block_ref_from);
594                 if (l->block_ref_to->dev_state->bdev == bdev_ref_to &&
595                     l->block_ref_to->dev_bytenr == dev_bytenr_ref_to &&
596                     l->block_ref_from->dev_state->bdev == bdev_ref_from &&
597                     l->block_ref_from->dev_bytenr == dev_bytenr_ref_from)
598                         return l;
599         }
600
601         return NULL;
602 }
603
604 static void btrfsic_dev_state_hashtable_init(
605                 struct btrfsic_dev_state_hashtable *h)
606 {
607         int i;
608
609         for (i = 0; i < BTRFSIC_DEV2STATE_HASHTABLE_SIZE; i++)
610                 INIT_LIST_HEAD(h->table + i);
611 }
612
613 static void btrfsic_dev_state_hashtable_add(
614                 struct btrfsic_dev_state *ds,
615                 struct btrfsic_dev_state_hashtable *h)
616 {
617         const unsigned int hashval =
618             (((unsigned int)((uintptr_t)ds->bdev)) &
619              (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
620
621         list_add(&ds->collision_resolving_node, h->table + hashval);
622 }
623
624 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds)
625 {
626         list_del(&ds->collision_resolving_node);
627 }
628
629 static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
630                 struct block_device *bdev,
631                 struct btrfsic_dev_state_hashtable *h)
632 {
633         const unsigned int hashval =
634             (((unsigned int)((uintptr_t)bdev)) &
635              (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
636         struct btrfsic_dev_state *ds;
637
638         list_for_each_entry(ds, h->table + hashval, collision_resolving_node) {
639                 if (ds->bdev == bdev)
640                         return ds;
641         }
642
643         return NULL;
644 }
645
646 static int btrfsic_process_superblock(struct btrfsic_state *state,
647                                       struct btrfs_fs_devices *fs_devices)
648 {
649         int ret = 0;
650         struct btrfs_super_block *selected_super;
651         struct list_head *dev_head = &fs_devices->devices;
652         struct btrfs_device *device;
653         struct btrfsic_dev_state *selected_dev_state = NULL;
654         int pass;
655
656         BUG_ON(NULL == state);
657         selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS);
658         if (NULL == selected_super) {
659                 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
660                 return -ENOMEM;
661         }
662
663         list_for_each_entry(device, dev_head, dev_list) {
664                 int i;
665                 struct btrfsic_dev_state *dev_state;
666
667                 if (!device->bdev || !device->name)
668                         continue;
669
670                 dev_state = btrfsic_dev_state_lookup(device->bdev);
671                 BUG_ON(NULL == dev_state);
672                 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
673                         ret = btrfsic_process_superblock_dev_mirror(
674                                         state, dev_state, device, i,
675                                         &selected_dev_state, selected_super);
676                         if (0 != ret && 0 == i) {
677                                 kfree(selected_super);
678                                 return ret;
679                         }
680                 }
681         }
682
683         if (NULL == state->latest_superblock) {
684                 printk(KERN_INFO "btrfsic: no superblock found!\n");
685                 kfree(selected_super);
686                 return -1;
687         }
688
689         state->csum_size = btrfs_super_csum_size(selected_super);
690
691         for (pass = 0; pass < 3; pass++) {
692                 int num_copies;
693                 int mirror_num;
694                 u64 next_bytenr;
695
696                 switch (pass) {
697                 case 0:
698                         next_bytenr = btrfs_super_root(selected_super);
699                         if (state->print_mask &
700                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
701                                 printk(KERN_INFO "root@%llu\n", next_bytenr);
702                         break;
703                 case 1:
704                         next_bytenr = btrfs_super_chunk_root(selected_super);
705                         if (state->print_mask &
706                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
707                                 printk(KERN_INFO "chunk@%llu\n", next_bytenr);
708                         break;
709                 case 2:
710                         next_bytenr = btrfs_super_log_root(selected_super);
711                         if (0 == next_bytenr)
712                                 continue;
713                         if (state->print_mask &
714                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
715                                 printk(KERN_INFO "log@%llu\n", next_bytenr);
716                         break;
717                 }
718
719                 num_copies =
720                     btrfs_num_copies(state->root->fs_info,
721                                      next_bytenr, state->metablock_size);
722                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
723                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
724                                next_bytenr, num_copies);
725
726                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
727                         struct btrfsic_block *next_block;
728                         struct btrfsic_block_data_ctx tmp_next_block_ctx;
729                         struct btrfsic_block_link *l;
730
731                         ret = btrfsic_map_block(state, next_bytenr,
732                                                 state->metablock_size,
733                                                 &tmp_next_block_ctx,
734                                                 mirror_num);
735                         if (ret) {
736                                 printk(KERN_INFO "btrfsic: btrfsic_map_block(root @%llu, mirror %d) failed!\n",
737                                        next_bytenr, mirror_num);
738                                 kfree(selected_super);
739                                 return -1;
740                         }
741
742                         next_block = btrfsic_block_hashtable_lookup(
743                                         tmp_next_block_ctx.dev->bdev,
744                                         tmp_next_block_ctx.dev_bytenr,
745                                         &state->block_hashtable);
746                         BUG_ON(NULL == next_block);
747
748                         l = btrfsic_block_link_hashtable_lookup(
749                                         tmp_next_block_ctx.dev->bdev,
750                                         tmp_next_block_ctx.dev_bytenr,
751                                         state->latest_superblock->dev_state->
752                                         bdev,
753                                         state->latest_superblock->dev_bytenr,
754                                         &state->block_link_hashtable);
755                         BUG_ON(NULL == l);
756
757                         ret = btrfsic_read_block(state, &tmp_next_block_ctx);
758                         if (ret < (int)PAGE_SIZE) {
759                                 printk(KERN_INFO
760                                        "btrfsic: read @logical %llu failed!\n",
761                                        tmp_next_block_ctx.start);
762                                 btrfsic_release_block_ctx(&tmp_next_block_ctx);
763                                 kfree(selected_super);
764                                 return -1;
765                         }
766
767                         ret = btrfsic_process_metablock(state,
768                                                         next_block,
769                                                         &tmp_next_block_ctx,
770                                                         BTRFS_MAX_LEVEL + 3, 1);
771                         btrfsic_release_block_ctx(&tmp_next_block_ctx);
772                 }
773         }
774
775         kfree(selected_super);
776         return ret;
777 }
778
779 static int btrfsic_process_superblock_dev_mirror(
780                 struct btrfsic_state *state,
781                 struct btrfsic_dev_state *dev_state,
782                 struct btrfs_device *device,
783                 int superblock_mirror_num,
784                 struct btrfsic_dev_state **selected_dev_state,
785                 struct btrfs_super_block *selected_super)
786 {
787         struct btrfs_super_block *super_tmp;
788         u64 dev_bytenr;
789         struct buffer_head *bh;
790         struct btrfsic_block *superblock_tmp;
791         int pass;
792         struct block_device *const superblock_bdev = device->bdev;
793
794         /* super block bytenr is always the unmapped device bytenr */
795         dev_bytenr = btrfs_sb_offset(superblock_mirror_num);
796         if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->commit_total_bytes)
797                 return -1;
798         bh = __bread(superblock_bdev, dev_bytenr / 4096,
799                      BTRFS_SUPER_INFO_SIZE);
800         if (NULL == bh)
801                 return -1;
802         super_tmp = (struct btrfs_super_block *)
803             (bh->b_data + (dev_bytenr & 4095));
804
805         if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
806             btrfs_super_magic(super_tmp) != BTRFS_MAGIC ||
807             memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
808             btrfs_super_nodesize(super_tmp) != state->metablock_size ||
809             btrfs_super_sectorsize(super_tmp) != state->datablock_size) {
810                 brelse(bh);
811                 return 0;
812         }
813
814         superblock_tmp =
815             btrfsic_block_hashtable_lookup(superblock_bdev,
816                                            dev_bytenr,
817                                            &state->block_hashtable);
818         if (NULL == superblock_tmp) {
819                 superblock_tmp = btrfsic_block_alloc();
820                 if (NULL == superblock_tmp) {
821                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
822                         brelse(bh);
823                         return -1;
824                 }
825                 /* for superblock, only the dev_bytenr makes sense */
826                 superblock_tmp->dev_bytenr = dev_bytenr;
827                 superblock_tmp->dev_state = dev_state;
828                 superblock_tmp->logical_bytenr = dev_bytenr;
829                 superblock_tmp->generation = btrfs_super_generation(super_tmp);
830                 superblock_tmp->is_metadata = 1;
831                 superblock_tmp->is_superblock = 1;
832                 superblock_tmp->is_iodone = 1;
833                 superblock_tmp->never_written = 0;
834                 superblock_tmp->mirror_num = 1 + superblock_mirror_num;
835                 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
836                         btrfs_info_in_rcu(device->dev_root->fs_info,
837                                 "new initial S-block (bdev %p, %s) @%llu (%s/%llu/%d)",
838                                      superblock_bdev,
839                                      rcu_str_deref(device->name), dev_bytenr,
840                                      dev_state->name, dev_bytenr,
841                                      superblock_mirror_num);
842                 list_add(&superblock_tmp->all_blocks_node,
843                          &state->all_blocks_list);
844                 btrfsic_block_hashtable_add(superblock_tmp,
845                                             &state->block_hashtable);
846         }
847
848         /* select the one with the highest generation field */
849         if (btrfs_super_generation(super_tmp) >
850             state->max_superblock_generation ||
851             0 == state->max_superblock_generation) {
852                 memcpy(selected_super, super_tmp, sizeof(*selected_super));
853                 *selected_dev_state = dev_state;
854                 state->max_superblock_generation =
855                     btrfs_super_generation(super_tmp);
856                 state->latest_superblock = superblock_tmp;
857         }
858
859         for (pass = 0; pass < 3; pass++) {
860                 u64 next_bytenr;
861                 int num_copies;
862                 int mirror_num;
863                 const char *additional_string = NULL;
864                 struct btrfs_disk_key tmp_disk_key;
865
866                 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
867                 tmp_disk_key.offset = 0;
868                 switch (pass) {
869                 case 0:
870                         btrfs_set_disk_key_objectid(&tmp_disk_key,
871                                                     BTRFS_ROOT_TREE_OBJECTID);
872                         additional_string = "initial root ";
873                         next_bytenr = btrfs_super_root(super_tmp);
874                         break;
875                 case 1:
876                         btrfs_set_disk_key_objectid(&tmp_disk_key,
877                                                     BTRFS_CHUNK_TREE_OBJECTID);
878                         additional_string = "initial chunk ";
879                         next_bytenr = btrfs_super_chunk_root(super_tmp);
880                         break;
881                 case 2:
882                         btrfs_set_disk_key_objectid(&tmp_disk_key,
883                                                     BTRFS_TREE_LOG_OBJECTID);
884                         additional_string = "initial log ";
885                         next_bytenr = btrfs_super_log_root(super_tmp);
886                         if (0 == next_bytenr)
887                                 continue;
888                         break;
889                 }
890
891                 num_copies =
892                     btrfs_num_copies(state->root->fs_info,
893                                      next_bytenr, state->metablock_size);
894                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
895                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
896                                next_bytenr, num_copies);
897                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
898                         struct btrfsic_block *next_block;
899                         struct btrfsic_block_data_ctx tmp_next_block_ctx;
900                         struct btrfsic_block_link *l;
901
902                         if (btrfsic_map_block(state, next_bytenr,
903                                               state->metablock_size,
904                                               &tmp_next_block_ctx,
905                                               mirror_num)) {
906                                 printk(KERN_INFO "btrfsic: btrfsic_map_block(bytenr @%llu, mirror %d) failed!\n",
907                                        next_bytenr, mirror_num);
908                                 brelse(bh);
909                                 return -1;
910                         }
911
912                         next_block = btrfsic_block_lookup_or_add(
913                                         state, &tmp_next_block_ctx,
914                                         additional_string, 1, 1, 0,
915                                         mirror_num, NULL);
916                         if (NULL == next_block) {
917                                 btrfsic_release_block_ctx(&tmp_next_block_ctx);
918                                 brelse(bh);
919                                 return -1;
920                         }
921
922                         next_block->disk_key = tmp_disk_key;
923                         next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
924                         l = btrfsic_block_link_lookup_or_add(
925                                         state, &tmp_next_block_ctx,
926                                         next_block, superblock_tmp,
927                                         BTRFSIC_GENERATION_UNKNOWN);
928                         btrfsic_release_block_ctx(&tmp_next_block_ctx);
929                         if (NULL == l) {
930                                 brelse(bh);
931                                 return -1;
932                         }
933                 }
934         }
935         if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES)
936                 btrfsic_dump_tree_sub(state, superblock_tmp, 0);
937
938         brelse(bh);
939         return 0;
940 }
941
942 static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void)
943 {
944         struct btrfsic_stack_frame *sf;
945
946         sf = kzalloc(sizeof(*sf), GFP_NOFS);
947         if (NULL == sf)
948                 printk(KERN_INFO "btrfsic: alloc memory failed!\n");
949         else
950                 sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER;
951         return sf;
952 }
953
954 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf)
955 {
956         BUG_ON(!(NULL == sf ||
957                  BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER == sf->magic));
958         kfree(sf);
959 }
960
961 static int btrfsic_process_metablock(
962                 struct btrfsic_state *state,
963                 struct btrfsic_block *const first_block,
964                 struct btrfsic_block_data_ctx *const first_block_ctx,
965                 int first_limit_nesting, int force_iodone_flag)
966 {
967         struct btrfsic_stack_frame initial_stack_frame = { 0 };
968         struct btrfsic_stack_frame *sf;
969         struct btrfsic_stack_frame *next_stack;
970         struct btrfs_header *const first_hdr =
971                 (struct btrfs_header *)first_block_ctx->datav[0];
972
973         BUG_ON(!first_hdr);
974         sf = &initial_stack_frame;
975         sf->error = 0;
976         sf->i = -1;
977         sf->limit_nesting = first_limit_nesting;
978         sf->block = first_block;
979         sf->block_ctx = first_block_ctx;
980         sf->next_block = NULL;
981         sf->hdr = first_hdr;
982         sf->prev = NULL;
983
984 continue_with_new_stack_frame:
985         sf->block->generation = le64_to_cpu(sf->hdr->generation);
986         if (0 == sf->hdr->level) {
987                 struct btrfs_leaf *const leafhdr =
988                     (struct btrfs_leaf *)sf->hdr;
989
990                 if (-1 == sf->i) {
991                         sf->nr = btrfs_stack_header_nritems(&leafhdr->header);
992
993                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
994                                 printk(KERN_INFO
995                                        "leaf %llu items %d generation %llu owner %llu\n",
996                                        sf->block_ctx->start, sf->nr,
997                                        btrfs_stack_header_generation(
998                                                &leafhdr->header),
999                                        btrfs_stack_header_owner(
1000                                                &leafhdr->header));
1001                 }
1002
1003 continue_with_current_leaf_stack_frame:
1004                 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
1005                         sf->i++;
1006                         sf->num_copies = 0;
1007                 }
1008
1009                 if (sf->i < sf->nr) {
1010                         struct btrfs_item disk_item;
1011                         u32 disk_item_offset =
1012                                 (uintptr_t)(leafhdr->items + sf->i) -
1013                                 (uintptr_t)leafhdr;
1014                         struct btrfs_disk_key *disk_key;
1015                         u8 type;
1016                         u32 item_offset;
1017                         u32 item_size;
1018
1019                         if (disk_item_offset + sizeof(struct btrfs_item) >
1020                             sf->block_ctx->len) {
1021 leaf_item_out_of_bounce_error:
1022                                 printk(KERN_INFO
1023                                        "btrfsic: leaf item out of bounce at logical %llu, dev %s\n",
1024                                        sf->block_ctx->start,
1025                                        sf->block_ctx->dev->name);
1026                                 goto one_stack_frame_backwards;
1027                         }
1028                         btrfsic_read_from_block_data(sf->block_ctx,
1029                                                      &disk_item,
1030                                                      disk_item_offset,
1031                                                      sizeof(struct btrfs_item));
1032                         item_offset = btrfs_stack_item_offset(&disk_item);
1033                         item_size = btrfs_stack_item_size(&disk_item);
1034                         disk_key = &disk_item.key;
1035                         type = btrfs_disk_key_type(disk_key);
1036
1037                         if (BTRFS_ROOT_ITEM_KEY == type) {
1038                                 struct btrfs_root_item root_item;
1039                                 u32 root_item_offset;
1040                                 u64 next_bytenr;
1041
1042                                 root_item_offset = item_offset +
1043                                         offsetof(struct btrfs_leaf, items);
1044                                 if (root_item_offset + item_size >
1045                                     sf->block_ctx->len)
1046                                         goto leaf_item_out_of_bounce_error;
1047                                 btrfsic_read_from_block_data(
1048                                         sf->block_ctx, &root_item,
1049                                         root_item_offset,
1050                                         item_size);
1051                                 next_bytenr = btrfs_root_bytenr(&root_item);
1052
1053                                 sf->error =
1054                                     btrfsic_create_link_to_next_block(
1055                                                 state,
1056                                                 sf->block,
1057                                                 sf->block_ctx,
1058                                                 next_bytenr,
1059                                                 sf->limit_nesting,
1060                                                 &sf->next_block_ctx,
1061                                                 &sf->next_block,
1062                                                 force_iodone_flag,
1063                                                 &sf->num_copies,
1064                                                 &sf->mirror_num,
1065                                                 disk_key,
1066                                                 btrfs_root_generation(
1067                                                 &root_item));
1068                                 if (sf->error)
1069                                         goto one_stack_frame_backwards;
1070
1071                                 if (NULL != sf->next_block) {
1072                                         struct btrfs_header *const next_hdr =
1073                                             (struct btrfs_header *)
1074                                             sf->next_block_ctx.datav[0];
1075
1076                                         next_stack =
1077                                             btrfsic_stack_frame_alloc();
1078                                         if (NULL == next_stack) {
1079                                                 sf->error = -1;
1080                                                 btrfsic_release_block_ctx(
1081                                                                 &sf->
1082                                                                 next_block_ctx);
1083                                                 goto one_stack_frame_backwards;
1084                                         }
1085
1086                                         next_stack->i = -1;
1087                                         next_stack->block = sf->next_block;
1088                                         next_stack->block_ctx =
1089                                             &sf->next_block_ctx;
1090                                         next_stack->next_block = NULL;
1091                                         next_stack->hdr = next_hdr;
1092                                         next_stack->limit_nesting =
1093                                             sf->limit_nesting - 1;
1094                                         next_stack->prev = sf;
1095                                         sf = next_stack;
1096                                         goto continue_with_new_stack_frame;
1097                                 }
1098                         } else if (BTRFS_EXTENT_DATA_KEY == type &&
1099                                    state->include_extent_data) {
1100                                 sf->error = btrfsic_handle_extent_data(
1101                                                 state,
1102                                                 sf->block,
1103                                                 sf->block_ctx,
1104                                                 item_offset,
1105                                                 force_iodone_flag);
1106                                 if (sf->error)
1107                                         goto one_stack_frame_backwards;
1108                         }
1109
1110                         goto continue_with_current_leaf_stack_frame;
1111                 }
1112         } else {
1113                 struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr;
1114
1115                 if (-1 == sf->i) {
1116                         sf->nr = btrfs_stack_header_nritems(&nodehdr->header);
1117
1118                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1119                                 printk(KERN_INFO "node %llu level %d items %d generation %llu owner %llu\n",
1120                                        sf->block_ctx->start,
1121                                        nodehdr->header.level, sf->nr,
1122                                        btrfs_stack_header_generation(
1123                                        &nodehdr->header),
1124                                        btrfs_stack_header_owner(
1125                                        &nodehdr->header));
1126                 }
1127
1128 continue_with_current_node_stack_frame:
1129                 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
1130                         sf->i++;
1131                         sf->num_copies = 0;
1132                 }
1133
1134                 if (sf->i < sf->nr) {
1135                         struct btrfs_key_ptr key_ptr;
1136                         u32 key_ptr_offset;
1137                         u64 next_bytenr;
1138
1139                         key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) -
1140                                           (uintptr_t)nodehdr;
1141                         if (key_ptr_offset + sizeof(struct btrfs_key_ptr) >
1142                             sf->block_ctx->len) {
1143                                 printk(KERN_INFO
1144                                        "btrfsic: node item out of bounce at logical %llu, dev %s\n",
1145                                        sf->block_ctx->start,
1146                                        sf->block_ctx->dev->name);
1147                                 goto one_stack_frame_backwards;
1148                         }
1149                         btrfsic_read_from_block_data(
1150                                 sf->block_ctx, &key_ptr, key_ptr_offset,
1151                                 sizeof(struct btrfs_key_ptr));
1152                         next_bytenr = btrfs_stack_key_blockptr(&key_ptr);
1153
1154                         sf->error = btrfsic_create_link_to_next_block(
1155                                         state,
1156                                         sf->block,
1157                                         sf->block_ctx,
1158                                         next_bytenr,
1159                                         sf->limit_nesting,
1160                                         &sf->next_block_ctx,
1161                                         &sf->next_block,
1162                                         force_iodone_flag,
1163                                         &sf->num_copies,
1164                                         &sf->mirror_num,
1165                                         &key_ptr.key,
1166                                         btrfs_stack_key_generation(&key_ptr));
1167                         if (sf->error)
1168                                 goto one_stack_frame_backwards;
1169
1170                         if (NULL != sf->next_block) {
1171                                 struct btrfs_header *const next_hdr =
1172                                     (struct btrfs_header *)
1173                                     sf->next_block_ctx.datav[0];
1174
1175                                 next_stack = btrfsic_stack_frame_alloc();
1176                                 if (NULL == next_stack) {
1177                                         sf->error = -1;
1178                                         goto one_stack_frame_backwards;
1179                                 }
1180
1181                                 next_stack->i = -1;
1182                                 next_stack->block = sf->next_block;
1183                                 next_stack->block_ctx = &sf->next_block_ctx;
1184                                 next_stack->next_block = NULL;
1185                                 next_stack->hdr = next_hdr;
1186                                 next_stack->limit_nesting =
1187                                     sf->limit_nesting - 1;
1188                                 next_stack->prev = sf;
1189                                 sf = next_stack;
1190                                 goto continue_with_new_stack_frame;
1191                         }
1192
1193                         goto continue_with_current_node_stack_frame;
1194                 }
1195         }
1196
1197 one_stack_frame_backwards:
1198         if (NULL != sf->prev) {
1199                 struct btrfsic_stack_frame *const prev = sf->prev;
1200
1201                 /* the one for the initial block is freed in the caller */
1202                 btrfsic_release_block_ctx(sf->block_ctx);
1203
1204                 if (sf->error) {
1205                         prev->error = sf->error;
1206                         btrfsic_stack_frame_free(sf);
1207                         sf = prev;
1208                         goto one_stack_frame_backwards;
1209                 }
1210
1211                 btrfsic_stack_frame_free(sf);
1212                 sf = prev;
1213                 goto continue_with_new_stack_frame;
1214         } else {
1215                 BUG_ON(&initial_stack_frame != sf);
1216         }
1217
1218         return sf->error;
1219 }
1220
1221 static void btrfsic_read_from_block_data(
1222         struct btrfsic_block_data_ctx *block_ctx,
1223         void *dstv, u32 offset, size_t len)
1224 {
1225         size_t cur;
1226         size_t offset_in_page;
1227         char *kaddr;
1228         char *dst = (char *)dstv;
1229         size_t start_offset = block_ctx->start & ((u64)PAGE_SIZE - 1);
1230         unsigned long i = (start_offset + offset) >> PAGE_SHIFT;
1231
1232         WARN_ON(offset + len > block_ctx->len);
1233         offset_in_page = (start_offset + offset) & (PAGE_SIZE - 1);
1234
1235         while (len > 0) {
1236                 cur = min(len, ((size_t)PAGE_SIZE - offset_in_page));
1237                 BUG_ON(i >= DIV_ROUND_UP(block_ctx->len, PAGE_SIZE));
1238                 kaddr = block_ctx->datav[i];
1239                 memcpy(dst, kaddr + offset_in_page, cur);
1240
1241                 dst += cur;
1242                 len -= cur;
1243                 offset_in_page = 0;
1244                 i++;
1245         }
1246 }
1247
1248 static int btrfsic_create_link_to_next_block(
1249                 struct btrfsic_state *state,
1250                 struct btrfsic_block *block,
1251                 struct btrfsic_block_data_ctx *block_ctx,
1252                 u64 next_bytenr,
1253                 int limit_nesting,
1254                 struct btrfsic_block_data_ctx *next_block_ctx,
1255                 struct btrfsic_block **next_blockp,
1256                 int force_iodone_flag,
1257                 int *num_copiesp, int *mirror_nump,
1258                 struct btrfs_disk_key *disk_key,
1259                 u64 parent_generation)
1260 {
1261         struct btrfsic_block *next_block = NULL;
1262         int ret;
1263         struct btrfsic_block_link *l;
1264         int did_alloc_block_link;
1265         int block_was_created;
1266
1267         *next_blockp = NULL;
1268         if (0 == *num_copiesp) {
1269                 *num_copiesp =
1270                     btrfs_num_copies(state->root->fs_info,
1271                                      next_bytenr, state->metablock_size);
1272                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1273                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1274                                next_bytenr, *num_copiesp);
1275                 *mirror_nump = 1;
1276         }
1277
1278         if (*mirror_nump > *num_copiesp)
1279                 return 0;
1280
1281         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1282                 printk(KERN_INFO
1283                        "btrfsic_create_link_to_next_block(mirror_num=%d)\n",
1284                        *mirror_nump);
1285         ret = btrfsic_map_block(state, next_bytenr,
1286                                 state->metablock_size,
1287                                 next_block_ctx, *mirror_nump);
1288         if (ret) {
1289                 printk(KERN_INFO
1290                        "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
1291                        next_bytenr, *mirror_nump);
1292                 btrfsic_release_block_ctx(next_block_ctx);
1293                 *next_blockp = NULL;
1294                 return -1;
1295         }
1296
1297         next_block = btrfsic_block_lookup_or_add(state,
1298                                                  next_block_ctx, "referenced ",
1299                                                  1, force_iodone_flag,
1300                                                  !force_iodone_flag,
1301                                                  *mirror_nump,
1302                                                  &block_was_created);
1303         if (NULL == next_block) {
1304                 btrfsic_release_block_ctx(next_block_ctx);
1305                 *next_blockp = NULL;
1306                 return -1;
1307         }
1308         if (block_was_created) {
1309                 l = NULL;
1310                 next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
1311         } else {
1312                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) {
1313                         if (next_block->logical_bytenr != next_bytenr &&
1314                             !(!next_block->is_metadata &&
1315                               0 == next_block->logical_bytenr))
1316                                 printk(KERN_INFO
1317                                        "Referenced block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n",
1318                                        next_bytenr, next_block_ctx->dev->name,
1319                                        next_block_ctx->dev_bytenr, *mirror_nump,
1320                                        btrfsic_get_block_type(state,
1321                                                               next_block),
1322                                        next_block->logical_bytenr);
1323                         else
1324                                 printk(KERN_INFO
1325                                        "Referenced block @%llu (%s/%llu/%d) found in hash table, %c.\n",
1326                                        next_bytenr, next_block_ctx->dev->name,
1327                                        next_block_ctx->dev_bytenr, *mirror_nump,
1328                                        btrfsic_get_block_type(state,
1329                                                               next_block));
1330                 }
1331                 next_block->logical_bytenr = next_bytenr;
1332
1333                 next_block->mirror_num = *mirror_nump;
1334                 l = btrfsic_block_link_hashtable_lookup(
1335                                 next_block_ctx->dev->bdev,
1336                                 next_block_ctx->dev_bytenr,
1337                                 block_ctx->dev->bdev,
1338                                 block_ctx->dev_bytenr,
1339                                 &state->block_link_hashtable);
1340         }
1341
1342         next_block->disk_key = *disk_key;
1343         if (NULL == l) {
1344                 l = btrfsic_block_link_alloc();
1345                 if (NULL == l) {
1346                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
1347                         btrfsic_release_block_ctx(next_block_ctx);
1348                         *next_blockp = NULL;
1349                         return -1;
1350                 }
1351
1352                 did_alloc_block_link = 1;
1353                 l->block_ref_to = next_block;
1354                 l->block_ref_from = block;
1355                 l->ref_cnt = 1;
1356                 l->parent_generation = parent_generation;
1357
1358                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1359                         btrfsic_print_add_link(state, l);
1360
1361                 list_add(&l->node_ref_to, &block->ref_to_list);
1362                 list_add(&l->node_ref_from, &next_block->ref_from_list);
1363
1364                 btrfsic_block_link_hashtable_add(l,
1365                                                  &state->block_link_hashtable);
1366         } else {
1367                 did_alloc_block_link = 0;
1368                 if (0 == limit_nesting) {
1369                         l->ref_cnt++;
1370                         l->parent_generation = parent_generation;
1371                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1372                                 btrfsic_print_add_link(state, l);
1373                 }
1374         }
1375
1376         if (limit_nesting > 0 && did_alloc_block_link) {
1377                 ret = btrfsic_read_block(state, next_block_ctx);
1378                 if (ret < (int)next_block_ctx->len) {
1379                         printk(KERN_INFO
1380                                "btrfsic: read block @logical %llu failed!\n",
1381                                next_bytenr);
1382                         btrfsic_release_block_ctx(next_block_ctx);
1383                         *next_blockp = NULL;
1384                         return -1;
1385                 }
1386
1387                 *next_blockp = next_block;
1388         } else {
1389                 *next_blockp = NULL;
1390         }
1391         (*mirror_nump)++;
1392
1393         return 0;
1394 }
1395
1396 static int btrfsic_handle_extent_data(
1397                 struct btrfsic_state *state,
1398                 struct btrfsic_block *block,
1399                 struct btrfsic_block_data_ctx *block_ctx,
1400                 u32 item_offset, int force_iodone_flag)
1401 {
1402         int ret;
1403         struct btrfs_file_extent_item file_extent_item;
1404         u64 file_extent_item_offset;
1405         u64 next_bytenr;
1406         u64 num_bytes;
1407         u64 generation;
1408         struct btrfsic_block_link *l;
1409
1410         file_extent_item_offset = offsetof(struct btrfs_leaf, items) +
1411                                   item_offset;
1412         if (file_extent_item_offset +
1413             offsetof(struct btrfs_file_extent_item, disk_num_bytes) >
1414             block_ctx->len) {
1415                 printk(KERN_INFO
1416                        "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1417                        block_ctx->start, block_ctx->dev->name);
1418                 return -1;
1419         }
1420
1421         btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1422                 file_extent_item_offset,
1423                 offsetof(struct btrfs_file_extent_item, disk_num_bytes));
1424         if (BTRFS_FILE_EXTENT_REG != file_extent_item.type ||
1425             btrfs_stack_file_extent_disk_bytenr(&file_extent_item) == 0) {
1426                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1427                         printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n",
1428                                file_extent_item.type,
1429                                btrfs_stack_file_extent_disk_bytenr(
1430                                &file_extent_item));
1431                 return 0;
1432         }
1433
1434         if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) >
1435             block_ctx->len) {
1436                 printk(KERN_INFO
1437                        "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1438                        block_ctx->start, block_ctx->dev->name);
1439                 return -1;
1440         }
1441         btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1442                                      file_extent_item_offset,
1443                                      sizeof(struct btrfs_file_extent_item));
1444         next_bytenr = btrfs_stack_file_extent_disk_bytenr(&file_extent_item);
1445         if (btrfs_stack_file_extent_compression(&file_extent_item) ==
1446             BTRFS_COMPRESS_NONE) {
1447                 next_bytenr += btrfs_stack_file_extent_offset(&file_extent_item);
1448                 num_bytes = btrfs_stack_file_extent_num_bytes(&file_extent_item);
1449         } else {
1450                 num_bytes = btrfs_stack_file_extent_disk_num_bytes(&file_extent_item);
1451         }
1452         generation = btrfs_stack_file_extent_generation(&file_extent_item);
1453
1454         if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1455                 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu, offset = %llu, num_bytes = %llu\n",
1456                        file_extent_item.type,
1457                        btrfs_stack_file_extent_disk_bytenr(&file_extent_item),
1458                        btrfs_stack_file_extent_offset(&file_extent_item),
1459                        num_bytes);
1460         while (num_bytes > 0) {
1461                 u32 chunk_len;
1462                 int num_copies;
1463                 int mirror_num;
1464
1465                 if (num_bytes > state->datablock_size)
1466                         chunk_len = state->datablock_size;
1467                 else
1468                         chunk_len = num_bytes;
1469
1470                 num_copies =
1471                     btrfs_num_copies(state->root->fs_info,
1472                                      next_bytenr, state->datablock_size);
1473                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1474                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1475                                next_bytenr, num_copies);
1476                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
1477                         struct btrfsic_block_data_ctx next_block_ctx;
1478                         struct btrfsic_block *next_block;
1479                         int block_was_created;
1480
1481                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1482                                 printk(KERN_INFO "btrfsic_handle_extent_data(mirror_num=%d)\n", mirror_num);
1483                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1484                                 printk(KERN_INFO
1485                                        "\tdisk_bytenr = %llu, num_bytes %u\n",
1486                                        next_bytenr, chunk_len);
1487                         ret = btrfsic_map_block(state, next_bytenr,
1488                                                 chunk_len, &next_block_ctx,
1489                                                 mirror_num);
1490                         if (ret) {
1491                                 printk(KERN_INFO
1492                                        "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
1493                                        next_bytenr, mirror_num);
1494                                 return -1;
1495                         }
1496
1497                         next_block = btrfsic_block_lookup_or_add(
1498                                         state,
1499                                         &next_block_ctx,
1500                                         "referenced ",
1501                                         0,
1502                                         force_iodone_flag,
1503                                         !force_iodone_flag,
1504                                         mirror_num,
1505                                         &block_was_created);
1506                         if (NULL == next_block) {
1507                                 printk(KERN_INFO
1508                                        "btrfsic: error, kmalloc failed!\n");
1509                                 btrfsic_release_block_ctx(&next_block_ctx);
1510                                 return -1;
1511                         }
1512                         if (!block_was_created) {
1513                                 if ((state->print_mask &
1514                                      BTRFSIC_PRINT_MASK_VERBOSE) &&
1515                                     next_block->logical_bytenr != next_bytenr &&
1516                                     !(!next_block->is_metadata &&
1517                                       0 == next_block->logical_bytenr)) {
1518                                         printk(KERN_INFO
1519                                                "Referenced block @%llu (%s/%llu/%d) found in hash table, D, bytenr mismatch (!= stored %llu).\n",
1520                                                next_bytenr,
1521                                                next_block_ctx.dev->name,
1522                                                next_block_ctx.dev_bytenr,
1523                                                mirror_num,
1524                                                next_block->logical_bytenr);
1525                                 }
1526                                 next_block->logical_bytenr = next_bytenr;
1527                                 next_block->mirror_num = mirror_num;
1528                         }
1529
1530                         l = btrfsic_block_link_lookup_or_add(state,
1531                                                              &next_block_ctx,
1532                                                              next_block, block,
1533                                                              generation);
1534                         btrfsic_release_block_ctx(&next_block_ctx);
1535                         if (NULL == l)
1536                                 return -1;
1537                 }
1538
1539                 next_bytenr += chunk_len;
1540                 num_bytes -= chunk_len;
1541         }
1542
1543         return 0;
1544 }
1545
1546 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
1547                              struct btrfsic_block_data_ctx *block_ctx_out,
1548                              int mirror_num)
1549 {
1550         int ret;
1551         u64 length;
1552         struct btrfs_bio *multi = NULL;
1553         struct btrfs_device *device;
1554
1555         length = len;
1556         ret = btrfs_map_block(state->root->fs_info, READ,
1557                               bytenr, &length, &multi, mirror_num);
1558
1559         if (ret) {
1560                 block_ctx_out->start = 0;
1561                 block_ctx_out->dev_bytenr = 0;
1562                 block_ctx_out->len = 0;
1563                 block_ctx_out->dev = NULL;
1564                 block_ctx_out->datav = NULL;
1565                 block_ctx_out->pagev = NULL;
1566                 block_ctx_out->mem_to_free = NULL;
1567
1568                 return ret;
1569         }
1570
1571         device = multi->stripes[0].dev;
1572         block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev);
1573         block_ctx_out->dev_bytenr = multi->stripes[0].physical;
1574         block_ctx_out->start = bytenr;
1575         block_ctx_out->len = len;
1576         block_ctx_out->datav = NULL;
1577         block_ctx_out->pagev = NULL;
1578         block_ctx_out->mem_to_free = NULL;
1579
1580         kfree(multi);
1581         if (NULL == block_ctx_out->dev) {
1582                 ret = -ENXIO;
1583                 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n");
1584         }
1585
1586         return ret;
1587 }
1588
1589 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx)
1590 {
1591         if (block_ctx->mem_to_free) {
1592                 unsigned int num_pages;
1593
1594                 BUG_ON(!block_ctx->datav);
1595                 BUG_ON(!block_ctx->pagev);
1596                 num_pages = (block_ctx->len + (u64)PAGE_SIZE - 1) >>
1597                             PAGE_SHIFT;
1598                 while (num_pages > 0) {
1599                         num_pages--;
1600                         if (block_ctx->datav[num_pages]) {
1601                                 kunmap(block_ctx->pagev[num_pages]);
1602                                 block_ctx->datav[num_pages] = NULL;
1603                         }
1604                         if (block_ctx->pagev[num_pages]) {
1605                                 __free_page(block_ctx->pagev[num_pages]);
1606                                 block_ctx->pagev[num_pages] = NULL;
1607                         }
1608                 }
1609
1610                 kfree(block_ctx->mem_to_free);
1611                 block_ctx->mem_to_free = NULL;
1612                 block_ctx->pagev = NULL;
1613                 block_ctx->datav = NULL;
1614         }
1615 }
1616
1617 static int btrfsic_read_block(struct btrfsic_state *state,
1618                               struct btrfsic_block_data_ctx *block_ctx)
1619 {
1620         unsigned int num_pages;
1621         unsigned int i;
1622         u64 dev_bytenr;
1623         int ret;
1624
1625         BUG_ON(block_ctx->datav);
1626         BUG_ON(block_ctx->pagev);
1627         BUG_ON(block_ctx->mem_to_free);
1628         if (block_ctx->dev_bytenr & ((u64)PAGE_SIZE - 1)) {
1629                 printk(KERN_INFO
1630                        "btrfsic: read_block() with unaligned bytenr %llu\n",
1631                        block_ctx->dev_bytenr);
1632                 return -1;
1633         }
1634
1635         num_pages = (block_ctx->len + (u64)PAGE_SIZE - 1) >>
1636                     PAGE_SHIFT;
1637         block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) +
1638                                           sizeof(*block_ctx->pagev)) *
1639                                          num_pages, GFP_NOFS);
1640         if (!block_ctx->mem_to_free)
1641                 return -ENOMEM;
1642         block_ctx->datav = block_ctx->mem_to_free;
1643         block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages);
1644         for (i = 0; i < num_pages; i++) {
1645                 block_ctx->pagev[i] = alloc_page(GFP_NOFS);
1646                 if (!block_ctx->pagev[i])
1647                         return -1;
1648         }
1649
1650         dev_bytenr = block_ctx->dev_bytenr;
1651         for (i = 0; i < num_pages;) {
1652                 struct bio *bio;
1653                 unsigned int j;
1654
1655                 bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i);
1656                 if (!bio) {
1657                         printk(KERN_INFO
1658                                "btrfsic: bio_alloc() for %u pages failed!\n",
1659                                num_pages - i);
1660                         return -1;
1661                 }
1662                 bio->bi_bdev = block_ctx->dev->bdev;
1663                 bio->bi_iter.bi_sector = dev_bytenr >> 9;
1664                 bio_set_op_attrs(bio, REQ_OP_READ, 0);
1665
1666                 for (j = i; j < num_pages; j++) {
1667                         ret = bio_add_page(bio, block_ctx->pagev[j],
1668                                            PAGE_SIZE, 0);
1669                         if (PAGE_SIZE != ret)
1670                                 break;
1671                 }
1672                 if (j == i) {
1673                         printk(KERN_INFO
1674                                "btrfsic: error, failed to add a single page!\n");
1675                         return -1;
1676                 }
1677                 if (submit_bio_wait(bio)) {
1678                         printk(KERN_INFO
1679                                "btrfsic: read error at logical %llu dev %s!\n",
1680                                block_ctx->start, block_ctx->dev->name);
1681                         bio_put(bio);
1682                         return -1;
1683                 }
1684                 bio_put(bio);
1685                 dev_bytenr += (j - i) * PAGE_SIZE;
1686                 i = j;
1687         }
1688         for (i = 0; i < num_pages; i++) {
1689                 block_ctx->datav[i] = kmap(block_ctx->pagev[i]);
1690                 if (!block_ctx->datav[i]) {
1691                         printk(KERN_INFO "btrfsic: kmap() failed (dev %s)!\n",
1692                                block_ctx->dev->name);
1693                         return -1;
1694                 }
1695         }
1696
1697         return block_ctx->len;
1698 }
1699
1700 static void btrfsic_dump_database(struct btrfsic_state *state)
1701 {
1702         const struct btrfsic_block *b_all;
1703
1704         BUG_ON(NULL == state);
1705
1706         printk(KERN_INFO "all_blocks_list:\n");
1707         list_for_each_entry(b_all, &state->all_blocks_list, all_blocks_node) {
1708                 const struct btrfsic_block_link *l;
1709
1710                 printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n",
1711                        btrfsic_get_block_type(state, b_all),
1712                        b_all->logical_bytenr, b_all->dev_state->name,
1713                        b_all->dev_bytenr, b_all->mirror_num);
1714
1715                 list_for_each_entry(l, &b_all->ref_to_list, node_ref_to) {
1716                         printk(KERN_INFO " %c @%llu (%s/%llu/%d) refers %u* to %c @%llu (%s/%llu/%d)\n",
1717                                btrfsic_get_block_type(state, b_all),
1718                                b_all->logical_bytenr, b_all->dev_state->name,
1719                                b_all->dev_bytenr, b_all->mirror_num,
1720                                l->ref_cnt,
1721                                btrfsic_get_block_type(state, l->block_ref_to),
1722                                l->block_ref_to->logical_bytenr,
1723                                l->block_ref_to->dev_state->name,
1724                                l->block_ref_to->dev_bytenr,
1725                                l->block_ref_to->mirror_num);
1726                 }
1727
1728                 list_for_each_entry(l, &b_all->ref_from_list, node_ref_from) {
1729                         printk(KERN_INFO " %c @%llu (%s/%llu/%d) is ref %u* from %c @%llu (%s/%llu/%d)\n",
1730                                btrfsic_get_block_type(state, b_all),
1731                                b_all->logical_bytenr, b_all->dev_state->name,
1732                                b_all->dev_bytenr, b_all->mirror_num,
1733                                l->ref_cnt,
1734                                btrfsic_get_block_type(state, l->block_ref_from),
1735                                l->block_ref_from->logical_bytenr,
1736                                l->block_ref_from->dev_state->name,
1737                                l->block_ref_from->dev_bytenr,
1738                                l->block_ref_from->mirror_num);
1739                 }
1740
1741                 printk(KERN_INFO "\n");
1742         }
1743 }
1744
1745 /*
1746  * Test whether the disk block contains a tree block (leaf or node)
1747  * (note that this test fails for the super block)
1748  */
1749 static int btrfsic_test_for_metadata(struct btrfsic_state *state,
1750                                      char **datav, unsigned int num_pages)
1751 {
1752         struct btrfs_header *h;
1753         u8 csum[BTRFS_CSUM_SIZE];
1754         u32 crc = ~(u32)0;
1755         unsigned int i;
1756
1757         if (num_pages * PAGE_SIZE < state->metablock_size)
1758                 return 1; /* not metadata */
1759         num_pages = state->metablock_size >> PAGE_SHIFT;
1760         h = (struct btrfs_header *)datav[0];
1761
1762         if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE))
1763                 return 1;
1764
1765         for (i = 0; i < num_pages; i++) {
1766                 u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE);
1767                 size_t sublen = i ? PAGE_SIZE :
1768                                     (PAGE_SIZE - BTRFS_CSUM_SIZE);
1769
1770                 crc = btrfs_crc32c(crc, data, sublen);
1771         }
1772         btrfs_csum_final(crc, csum);
1773         if (memcmp(csum, h->csum, state->csum_size))
1774                 return 1;
1775
1776         return 0; /* is metadata */
1777 }
1778
1779 static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1780                                           u64 dev_bytenr, char **mapped_datav,
1781                                           unsigned int num_pages,
1782                                           struct bio *bio, int *bio_is_patched,
1783                                           struct buffer_head *bh,
1784                                           int submit_bio_bh_rw)
1785 {
1786         int is_metadata;
1787         struct btrfsic_block *block;
1788         struct btrfsic_block_data_ctx block_ctx;
1789         int ret;
1790         struct btrfsic_state *state = dev_state->state;
1791         struct block_device *bdev = dev_state->bdev;
1792         unsigned int processed_len;
1793
1794         if (NULL != bio_is_patched)
1795                 *bio_is_patched = 0;
1796
1797 again:
1798         if (num_pages == 0)
1799                 return;
1800
1801         processed_len = 0;
1802         is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav,
1803                                                       num_pages));
1804
1805         block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr,
1806                                                &state->block_hashtable);
1807         if (NULL != block) {
1808                 u64 bytenr = 0;
1809                 struct btrfsic_block_link *l, *tmp;
1810
1811                 if (block->is_superblock) {
1812                         bytenr = btrfs_super_bytenr((struct btrfs_super_block *)
1813                                                     mapped_datav[0]);
1814                         if (num_pages * PAGE_SIZE <
1815                             BTRFS_SUPER_INFO_SIZE) {
1816                                 printk(KERN_INFO
1817                                        "btrfsic: cannot work with too short bios!\n");
1818                                 return;
1819                         }
1820                         is_metadata = 1;
1821                         BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_SIZE - 1));
1822                         processed_len = BTRFS_SUPER_INFO_SIZE;
1823                         if (state->print_mask &
1824                             BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) {
1825                                 printk(KERN_INFO
1826                                        "[before new superblock is written]:\n");
1827                                 btrfsic_dump_tree_sub(state, block, 0);
1828                         }
1829                 }
1830                 if (is_metadata) {
1831                         if (!block->is_superblock) {
1832                                 if (num_pages * PAGE_SIZE <
1833                                     state->metablock_size) {
1834                                         printk(KERN_INFO
1835                                                "btrfsic: cannot work with too short bios!\n");
1836                                         return;
1837                                 }
1838                                 processed_len = state->metablock_size;
1839                                 bytenr = btrfs_stack_header_bytenr(
1840                                                 (struct btrfs_header *)
1841                                                 mapped_datav[0]);
1842                                 btrfsic_cmp_log_and_dev_bytenr(state, bytenr,
1843                                                                dev_state,
1844                                                                dev_bytenr);
1845                         }
1846                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) {
1847                                 if (block->logical_bytenr != bytenr &&
1848                                     !(!block->is_metadata &&
1849                                       block->logical_bytenr == 0))
1850                                         printk(KERN_INFO
1851                                                "Written block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n",
1852                                                bytenr, dev_state->name,
1853                                                dev_bytenr,
1854                                                block->mirror_num,
1855                                                btrfsic_get_block_type(state,
1856                                                                       block),
1857                                                block->logical_bytenr);
1858                                 else
1859                                         printk(KERN_INFO
1860                                                "Written block @%llu (%s/%llu/%d) found in hash table, %c.\n",
1861                                                bytenr, dev_state->name,
1862                                                dev_bytenr, block->mirror_num,
1863                                                btrfsic_get_block_type(state,
1864                                                                       block));
1865                         }
1866                         block->logical_bytenr = bytenr;
1867                 } else {
1868                         if (num_pages * PAGE_SIZE <
1869                             state->datablock_size) {
1870                                 printk(KERN_INFO
1871                                        "btrfsic: cannot work with too short bios!\n");
1872                                 return;
1873                         }
1874                         processed_len = state->datablock_size;
1875                         bytenr = block->logical_bytenr;
1876                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1877                                 printk(KERN_INFO
1878                                        "Written block @%llu (%s/%llu/%d) found in hash table, %c.\n",
1879                                        bytenr, dev_state->name, dev_bytenr,
1880                                        block->mirror_num,
1881                                        btrfsic_get_block_type(state, block));
1882                 }
1883
1884                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1885                         printk(KERN_INFO
1886                                "ref_to_list: %cE, ref_from_list: %cE\n",
1887                                list_empty(&block->ref_to_list) ? ' ' : '!',
1888                                list_empty(&block->ref_from_list) ? ' ' : '!');
1889                 if (btrfsic_is_block_ref_by_superblock(state, block, 0)) {
1890                         printk(KERN_INFO "btrfs: attempt to overwrite %c-block @%llu (%s/%llu/%d), old(gen=%llu, objectid=%llu, type=%d, offset=%llu), new(gen=%llu), which is referenced by most recent superblock (superblockgen=%llu)!\n",
1891                                btrfsic_get_block_type(state, block), bytenr,
1892                                dev_state->name, dev_bytenr, block->mirror_num,
1893                                block->generation,
1894                                btrfs_disk_key_objectid(&block->disk_key),
1895                                block->disk_key.type,
1896                                btrfs_disk_key_offset(&block->disk_key),
1897                                btrfs_stack_header_generation(
1898                                        (struct btrfs_header *) mapped_datav[0]),
1899                                state->max_superblock_generation);
1900                         btrfsic_dump_tree(state);
1901                 }
1902
1903                 if (!block->is_iodone && !block->never_written) {
1904                         printk(KERN_INFO "btrfs: attempt to overwrite %c-block @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu, which is not yet iodone!\n",
1905                                btrfsic_get_block_type(state, block), bytenr,
1906                                dev_state->name, dev_bytenr, block->mirror_num,
1907                                block->generation,
1908                                btrfs_stack_header_generation(
1909                                        (struct btrfs_header *)
1910                                        mapped_datav[0]));
1911                         /* it would not be safe to go on */
1912                         btrfsic_dump_tree(state);
1913                         goto continue_loop;
1914                 }
1915
1916                 /*
1917                  * Clear all references of this block. Do not free
1918                  * the block itself even if is not referenced anymore
1919                  * because it still carries valuable information
1920                  * like whether it was ever written and IO completed.
1921                  */
1922                 list_for_each_entry_safe(l, tmp, &block->ref_to_list,
1923                                          node_ref_to) {
1924                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1925                                 btrfsic_print_rem_link(state, l);
1926                         l->ref_cnt--;
1927                         if (0 == l->ref_cnt) {
1928                                 list_del(&l->node_ref_to);
1929                                 list_del(&l->node_ref_from);
1930                                 btrfsic_block_link_hashtable_remove(l);
1931                                 btrfsic_block_link_free(l);
1932                         }
1933                 }
1934
1935                 block_ctx.dev = dev_state;
1936                 block_ctx.dev_bytenr = dev_bytenr;
1937                 block_ctx.start = bytenr;
1938                 block_ctx.len = processed_len;
1939                 block_ctx.pagev = NULL;
1940                 block_ctx.mem_to_free = NULL;
1941                 block_ctx.datav = mapped_datav;
1942
1943                 if (is_metadata || state->include_extent_data) {
1944                         block->never_written = 0;
1945                         block->iodone_w_error = 0;
1946                         if (NULL != bio) {
1947                                 block->is_iodone = 0;
1948                                 BUG_ON(NULL == bio_is_patched);
1949                                 if (!*bio_is_patched) {
1950                                         block->orig_bio_bh_private =
1951                                             bio->bi_private;
1952                                         block->orig_bio_bh_end_io.bio =
1953                                             bio->bi_end_io;
1954                                         block->next_in_same_bio = NULL;
1955                                         bio->bi_private = block;
1956                                         bio->bi_end_io = btrfsic_bio_end_io;
1957                                         *bio_is_patched = 1;
1958                                 } else {
1959                                         struct btrfsic_block *chained_block =
1960                                             (struct btrfsic_block *)
1961                                             bio->bi_private;
1962
1963                                         BUG_ON(NULL == chained_block);
1964                                         block->orig_bio_bh_private =
1965                                             chained_block->orig_bio_bh_private;
1966                                         block->orig_bio_bh_end_io.bio =
1967                                             chained_block->orig_bio_bh_end_io.
1968                                             bio;
1969                                         block->next_in_same_bio = chained_block;
1970                                         bio->bi_private = block;
1971                                 }
1972                         } else if (NULL != bh) {
1973                                 block->is_iodone = 0;
1974                                 block->orig_bio_bh_private = bh->b_private;
1975                                 block->orig_bio_bh_end_io.bh = bh->b_end_io;
1976                                 block->next_in_same_bio = NULL;
1977                                 bh->b_private = block;
1978                                 bh->b_end_io = btrfsic_bh_end_io;
1979                         } else {
1980                                 block->is_iodone = 1;
1981                                 block->orig_bio_bh_private = NULL;
1982                                 block->orig_bio_bh_end_io.bio = NULL;
1983                                 block->next_in_same_bio = NULL;
1984                         }
1985                 }
1986
1987                 block->flush_gen = dev_state->last_flush_gen + 1;
1988                 block->submit_bio_bh_rw = submit_bio_bh_rw;
1989                 if (is_metadata) {
1990                         block->logical_bytenr = bytenr;
1991                         block->is_metadata = 1;
1992                         if (block->is_superblock) {
1993                                 BUG_ON(PAGE_SIZE !=
1994                                        BTRFS_SUPER_INFO_SIZE);
1995                                 ret = btrfsic_process_written_superblock(
1996                                                 state,
1997                                                 block,
1998                                                 (struct btrfs_super_block *)
1999                                                 mapped_datav[0]);
2000                                 if (state->print_mask &
2001                                     BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) {
2002                                         printk(KERN_INFO
2003                                         "[after new superblock is written]:\n");
2004                                         btrfsic_dump_tree_sub(state, block, 0);
2005                                 }
2006                         } else {
2007                                 block->mirror_num = 0;  /* unknown */
2008                                 ret = btrfsic_process_metablock(
2009                                                 state,
2010                                                 block,
2011                                                 &block_ctx,
2012                                                 0, 0);
2013                         }
2014                         if (ret)
2015                                 printk(KERN_INFO
2016                                        "btrfsic: btrfsic_process_metablock(root @%llu) failed!\n",
2017                                        dev_bytenr);
2018                 } else {
2019                         block->is_metadata = 0;
2020                         block->mirror_num = 0;  /* unknown */
2021                         block->generation = BTRFSIC_GENERATION_UNKNOWN;
2022                         if (!state->include_extent_data
2023                             && list_empty(&block->ref_from_list)) {
2024                                 /*
2025                                  * disk block is overwritten with extent
2026                                  * data (not meta data) and we are configured
2027                                  * to not include extent data: take the
2028                                  * chance and free the block's memory
2029                                  */
2030                                 btrfsic_block_hashtable_remove(block);
2031                                 list_del(&block->all_blocks_node);
2032                                 btrfsic_block_free(block);
2033                         }
2034                 }
2035                 btrfsic_release_block_ctx(&block_ctx);
2036         } else {
2037                 /* block has not been found in hash table */
2038                 u64 bytenr;
2039
2040                 if (!is_metadata) {
2041                         processed_len = state->datablock_size;
2042                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2043                                 printk(KERN_INFO "Written block (%s/%llu/?) !found in hash table, D.\n",
2044                                        dev_state->name, dev_bytenr);
2045                         if (!state->include_extent_data) {
2046                                 /* ignore that written D block */
2047                                 goto continue_loop;
2048                         }
2049
2050                         /* this is getting ugly for the
2051                          * include_extent_data case... */
2052                         bytenr = 0;     /* unknown */
2053                 } else {
2054                         processed_len = state->metablock_size;
2055                         bytenr = btrfs_stack_header_bytenr(
2056                                         (struct btrfs_header *)
2057                                         mapped_datav[0]);
2058                         btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state,
2059                                                        dev_bytenr);
2060                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2061                                 printk(KERN_INFO
2062                                        "Written block @%llu (%s/%llu/?) !found in hash table, M.\n",
2063                                        bytenr, dev_state->name, dev_bytenr);
2064                 }
2065
2066                 block_ctx.dev = dev_state;
2067                 block_ctx.dev_bytenr = dev_bytenr;
2068                 block_ctx.start = bytenr;
2069                 block_ctx.len = processed_len;
2070                 block_ctx.pagev = NULL;
2071                 block_ctx.mem_to_free = NULL;
2072                 block_ctx.datav = mapped_datav;
2073
2074                 block = btrfsic_block_alloc();
2075                 if (NULL == block) {
2076                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
2077                         btrfsic_release_block_ctx(&block_ctx);
2078                         goto continue_loop;
2079                 }
2080                 block->dev_state = dev_state;
2081                 block->dev_bytenr = dev_bytenr;
2082                 block->logical_bytenr = bytenr;
2083                 block->is_metadata = is_metadata;
2084                 block->never_written = 0;
2085                 block->iodone_w_error = 0;
2086                 block->mirror_num = 0;  /* unknown */
2087                 block->flush_gen = dev_state->last_flush_gen + 1;
2088                 block->submit_bio_bh_rw = submit_bio_bh_rw;
2089                 if (NULL != bio) {
2090                         block->is_iodone = 0;
2091                         BUG_ON(NULL == bio_is_patched);
2092                         if (!*bio_is_patched) {
2093                                 block->orig_bio_bh_private = bio->bi_private;
2094                                 block->orig_bio_bh_end_io.bio = bio->bi_end_io;
2095                                 block->next_in_same_bio = NULL;
2096                                 bio->bi_private = block;
2097                                 bio->bi_end_io = btrfsic_bio_end_io;
2098                                 *bio_is_patched = 1;
2099                         } else {
2100                                 struct btrfsic_block *chained_block =
2101                                     (struct btrfsic_block *)
2102                                     bio->bi_private;
2103
2104                                 BUG_ON(NULL == chained_block);
2105                                 block->orig_bio_bh_private =
2106                                     chained_block->orig_bio_bh_private;
2107                                 block->orig_bio_bh_end_io.bio =
2108                                     chained_block->orig_bio_bh_end_io.bio;
2109                                 block->next_in_same_bio = chained_block;
2110                                 bio->bi_private = block;
2111                         }
2112                 } else if (NULL != bh) {
2113                         block->is_iodone = 0;
2114                         block->orig_bio_bh_private = bh->b_private;
2115                         block->orig_bio_bh_end_io.bh = bh->b_end_io;
2116                         block->next_in_same_bio = NULL;
2117                         bh->b_private = block;
2118                         bh->b_end_io = btrfsic_bh_end_io;
2119                 } else {
2120                         block->is_iodone = 1;
2121                         block->orig_bio_bh_private = NULL;
2122                         block->orig_bio_bh_end_io.bio = NULL;
2123                         block->next_in_same_bio = NULL;
2124                 }
2125                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2126                         printk(KERN_INFO
2127                                "New written %c-block @%llu (%s/%llu/%d)\n",
2128                                is_metadata ? 'M' : 'D',
2129                                block->logical_bytenr, block->dev_state->name,
2130                                block->dev_bytenr, block->mirror_num);
2131                 list_add(&block->all_blocks_node, &state->all_blocks_list);
2132                 btrfsic_block_hashtable_add(block, &state->block_hashtable);
2133
2134                 if (is_metadata) {
2135                         ret = btrfsic_process_metablock(state, block,
2136                                                         &block_ctx, 0, 0);
2137                         if (ret)
2138                                 printk(KERN_INFO
2139                                        "btrfsic: process_metablock(root @%llu) failed!\n",
2140                                        dev_bytenr);
2141                 }
2142                 btrfsic_release_block_ctx(&block_ctx);
2143         }
2144
2145 continue_loop:
2146         BUG_ON(!processed_len);
2147         dev_bytenr += processed_len;
2148         mapped_datav += processed_len >> PAGE_SHIFT;
2149         num_pages -= processed_len >> PAGE_SHIFT;
2150         goto again;
2151 }
2152
2153 static void btrfsic_bio_end_io(struct bio *bp)
2154 {
2155         struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private;
2156         int iodone_w_error;
2157
2158         /* mutex is not held! This is not save if IO is not yet completed
2159          * on umount */
2160         iodone_w_error = 0;
2161         if (bp->bi_error)
2162                 iodone_w_error = 1;
2163
2164         BUG_ON(NULL == block);
2165         bp->bi_private = block->orig_bio_bh_private;
2166         bp->bi_end_io = block->orig_bio_bh_end_io.bio;
2167
2168         do {
2169                 struct btrfsic_block *next_block;
2170                 struct btrfsic_dev_state *const dev_state = block->dev_state;
2171
2172                 if ((dev_state->state->print_mask &
2173                      BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2174                         printk(KERN_INFO
2175                                "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
2176                                bp->bi_error,
2177                                btrfsic_get_block_type(dev_state->state, block),
2178                                block->logical_bytenr, dev_state->name,
2179                                block->dev_bytenr, block->mirror_num);
2180                 next_block = block->next_in_same_bio;
2181                 block->iodone_w_error = iodone_w_error;
2182                 if (block->submit_bio_bh_rw & REQ_PREFLUSH) {
2183                         dev_state->last_flush_gen++;
2184                         if ((dev_state->state->print_mask &
2185                              BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2186                                 printk(KERN_INFO
2187                                        "bio_end_io() new %s flush_gen=%llu\n",
2188                                        dev_state->name,
2189                                        dev_state->last_flush_gen);
2190                 }
2191                 if (block->submit_bio_bh_rw & REQ_FUA)
2192                         block->flush_gen = 0; /* FUA completed means block is
2193                                                * on disk */
2194                 block->is_iodone = 1; /* for FLUSH, this releases the block */
2195                 block = next_block;
2196         } while (NULL != block);
2197
2198         bp->bi_end_io(bp);
2199 }
2200
2201 static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
2202 {
2203         struct btrfsic_block *block = (struct btrfsic_block *)bh->b_private;
2204         int iodone_w_error = !uptodate;
2205         struct btrfsic_dev_state *dev_state;
2206
2207         BUG_ON(NULL == block);
2208         dev_state = block->dev_state;
2209         if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2210                 printk(KERN_INFO
2211                        "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n",
2212                        iodone_w_error,
2213                        btrfsic_get_block_type(dev_state->state, block),
2214                        block->logical_bytenr, block->dev_state->name,
2215                        block->dev_bytenr, block->mirror_num);
2216
2217         block->iodone_w_error = iodone_w_error;
2218         if (block->submit_bio_bh_rw & REQ_PREFLUSH) {
2219                 dev_state->last_flush_gen++;
2220                 if ((dev_state->state->print_mask &
2221                      BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2222                         printk(KERN_INFO
2223                                "bh_end_io() new %s flush_gen=%llu\n",
2224                                dev_state->name, dev_state->last_flush_gen);
2225         }
2226         if (block->submit_bio_bh_rw & REQ_FUA)
2227                 block->flush_gen = 0; /* FUA completed means block is on disk */
2228
2229         bh->b_private = block->orig_bio_bh_private;
2230         bh->b_end_io = block->orig_bio_bh_end_io.bh;
2231         block->is_iodone = 1; /* for FLUSH, this releases the block */
2232         bh->b_end_io(bh, uptodate);
2233 }
2234
2235 static int btrfsic_process_written_superblock(
2236                 struct btrfsic_state *state,
2237                 struct btrfsic_block *const superblock,
2238                 struct btrfs_super_block *const super_hdr)
2239 {
2240         int pass;
2241
2242         superblock->generation = btrfs_super_generation(super_hdr);
2243         if (!(superblock->generation > state->max_superblock_generation ||
2244               0 == state->max_superblock_generation)) {
2245                 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2246                         printk(KERN_INFO
2247                                "btrfsic: superblock @%llu (%s/%llu/%d) with old gen %llu <= %llu\n",
2248                                superblock->logical_bytenr,
2249                                superblock->dev_state->name,
2250                                superblock->dev_bytenr, superblock->mirror_num,
2251                                btrfs_super_generation(super_hdr),
2252                                state->max_superblock_generation);
2253         } else {
2254                 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2255                         printk(KERN_INFO
2256                                "btrfsic: got new superblock @%llu (%s/%llu/%d) with new gen %llu > %llu\n",
2257                                superblock->logical_bytenr,
2258                                superblock->dev_state->name,
2259                                superblock->dev_bytenr, superblock->mirror_num,
2260                                btrfs_super_generation(super_hdr),
2261                                state->max_superblock_generation);
2262
2263                 state->max_superblock_generation =
2264                     btrfs_super_generation(super_hdr);
2265                 state->latest_superblock = superblock;
2266         }
2267
2268         for (pass = 0; pass < 3; pass++) {
2269                 int ret;
2270                 u64 next_bytenr;
2271                 struct btrfsic_block *next_block;
2272                 struct btrfsic_block_data_ctx tmp_next_block_ctx;
2273                 struct btrfsic_block_link *l;
2274                 int num_copies;
2275                 int mirror_num;
2276                 const char *additional_string = NULL;
2277                 struct btrfs_disk_key tmp_disk_key = {0};
2278
2279                 btrfs_set_disk_key_objectid(&tmp_disk_key,
2280                                             BTRFS_ROOT_ITEM_KEY);
2281                 btrfs_set_disk_key_objectid(&tmp_disk_key, 0);
2282
2283                 switch (pass) {
2284                 case 0:
2285                         btrfs_set_disk_key_objectid(&tmp_disk_key,
2286                                                     BTRFS_ROOT_TREE_OBJECTID);
2287                         additional_string = "root ";
2288                         next_bytenr = btrfs_super_root(super_hdr);
2289                         if (state->print_mask &
2290                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2291                                 printk(KERN_INFO "root@%llu\n", next_bytenr);
2292                         break;
2293                 case 1:
2294                         btrfs_set_disk_key_objectid(&tmp_disk_key,
2295                                                     BTRFS_CHUNK_TREE_OBJECTID);
2296                         additional_string = "chunk ";
2297                         next_bytenr = btrfs_super_chunk_root(super_hdr);
2298                         if (state->print_mask &
2299                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2300                                 printk(KERN_INFO "chunk@%llu\n", next_bytenr);
2301                         break;
2302                 case 2:
2303                         btrfs_set_disk_key_objectid(&tmp_disk_key,
2304                                                     BTRFS_TREE_LOG_OBJECTID);
2305                         additional_string = "log ";
2306                         next_bytenr = btrfs_super_log_root(super_hdr);
2307                         if (0 == next_bytenr)
2308                                 continue;
2309                         if (state->print_mask &
2310                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2311                                 printk(KERN_INFO "log@%llu\n", next_bytenr);
2312                         break;
2313                 }
2314
2315                 num_copies =
2316                     btrfs_num_copies(state->root->fs_info,
2317                                      next_bytenr, BTRFS_SUPER_INFO_SIZE);
2318                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
2319                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
2320                                next_bytenr, num_copies);
2321                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2322                         int was_created;
2323
2324                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2325                                 printk(KERN_INFO
2326                                        "btrfsic_process_written_superblock(mirror_num=%d)\n", mirror_num);
2327                         ret = btrfsic_map_block(state, next_bytenr,
2328                                                 BTRFS_SUPER_INFO_SIZE,
2329                                                 &tmp_next_block_ctx,
2330                                                 mirror_num);
2331                         if (ret) {
2332                                 printk(KERN_INFO
2333                                        "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
2334                                        next_bytenr, mirror_num);
2335                                 return -1;
2336                         }
2337
2338                         next_block = btrfsic_block_lookup_or_add(
2339                                         state,
2340                                         &tmp_next_block_ctx,
2341                                         additional_string,
2342                                         1, 0, 1,
2343                                         mirror_num,
2344                                         &was_created);
2345                         if (NULL == next_block) {
2346                                 printk(KERN_INFO
2347                                        "btrfsic: error, kmalloc failed!\n");
2348                                 btrfsic_release_block_ctx(&tmp_next_block_ctx);
2349                                 return -1;
2350                         }
2351
2352                         next_block->disk_key = tmp_disk_key;
2353                         if (was_created)
2354                                 next_block->generation =
2355                                     BTRFSIC_GENERATION_UNKNOWN;
2356                         l = btrfsic_block_link_lookup_or_add(
2357                                         state,
2358                                         &tmp_next_block_ctx,
2359                                         next_block,
2360                                         superblock,
2361                                         BTRFSIC_GENERATION_UNKNOWN);
2362                         btrfsic_release_block_ctx(&tmp_next_block_ctx);
2363                         if (NULL == l)
2364                                 return -1;
2365                 }
2366         }
2367
2368         if (WARN_ON(-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)))
2369                 btrfsic_dump_tree(state);
2370
2371         return 0;
2372 }
2373
2374 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
2375                                         struct btrfsic_block *const block,
2376                                         int recursion_level)
2377 {
2378         const struct btrfsic_block_link *l;
2379         int ret = 0;
2380
2381         if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
2382                 /*
2383                  * Note that this situation can happen and does not
2384                  * indicate an error in regular cases. It happens
2385                  * when disk blocks are freed and later reused.
2386                  * The check-integrity module is not aware of any
2387                  * block free operations, it just recognizes block
2388                  * write operations. Therefore it keeps the linkage
2389                  * information for a block until a block is
2390                  * rewritten. This can temporarily cause incorrect
2391                  * and even circular linkage informations. This
2392                  * causes no harm unless such blocks are referenced
2393                  * by the most recent super block.
2394                  */
2395                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2396                         printk(KERN_INFO
2397                                "btrfsic: abort cyclic linkage (case 1).\n");
2398
2399                 return ret;
2400         }
2401
2402         /*
2403          * This algorithm is recursive because the amount of used stack
2404          * space is very small and the max recursion depth is limited.
2405          */
2406         list_for_each_entry(l, &block->ref_to_list, node_ref_to) {
2407                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2408                         printk(KERN_INFO
2409                                "rl=%d, %c @%llu (%s/%llu/%d) %u* refers to %c @%llu (%s/%llu/%d)\n",
2410                                recursion_level,
2411                                btrfsic_get_block_type(state, block),
2412                                block->logical_bytenr, block->dev_state->name,
2413                                block->dev_bytenr, block->mirror_num,
2414                                l->ref_cnt,
2415                                btrfsic_get_block_type(state, l->block_ref_to),
2416                                l->block_ref_to->logical_bytenr,
2417                                l->block_ref_to->dev_state->name,
2418                                l->block_ref_to->dev_bytenr,
2419                                l->block_ref_to->mirror_num);
2420                 if (l->block_ref_to->never_written) {
2421                         printk(KERN_INFO "btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which is never written!\n",
2422                                btrfsic_get_block_type(state, l->block_ref_to),
2423                                l->block_ref_to->logical_bytenr,
2424                                l->block_ref_to->dev_state->name,
2425                                l->block_ref_to->dev_bytenr,
2426                                l->block_ref_to->mirror_num);
2427                         ret = -1;
2428                 } else if (!l->block_ref_to->is_iodone) {
2429                         printk(KERN_INFO "btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which is not yet iodone!\n",
2430                                btrfsic_get_block_type(state, l->block_ref_to),
2431                                l->block_ref_to->logical_bytenr,
2432                                l->block_ref_to->dev_state->name,
2433                                l->block_ref_to->dev_bytenr,
2434                                l->block_ref_to->mirror_num);
2435                         ret = -1;
2436                 } else if (l->block_ref_to->iodone_w_error) {
2437                         printk(KERN_INFO "btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which has write error!\n",
2438                                btrfsic_get_block_type(state, l->block_ref_to),
2439                                l->block_ref_to->logical_bytenr,
2440                                l->block_ref_to->dev_state->name,
2441                                l->block_ref_to->dev_bytenr,
2442                                l->block_ref_to->mirror_num);
2443                         ret = -1;
2444                 } else if (l->parent_generation !=
2445                            l->block_ref_to->generation &&
2446                            BTRFSIC_GENERATION_UNKNOWN !=
2447                            l->parent_generation &&
2448                            BTRFSIC_GENERATION_UNKNOWN !=
2449                            l->block_ref_to->generation) {
2450                         printk(KERN_INFO "btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) with generation %llu != parent generation %llu!\n",
2451                                btrfsic_get_block_type(state, l->block_ref_to),
2452                                l->block_ref_to->logical_bytenr,
2453                                l->block_ref_to->dev_state->name,
2454                                l->block_ref_to->dev_bytenr,
2455                                l->block_ref_to->mirror_num,
2456                                l->block_ref_to->generation,
2457                                l->parent_generation);
2458                         ret = -1;
2459                 } else if (l->block_ref_to->flush_gen >
2460                            l->block_ref_to->dev_state->last_flush_gen) {
2461                         printk(KERN_INFO "btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which is not flushed out of disk's write cache (block flush_gen=%llu, dev->flush_gen=%llu)!\n",
2462                                btrfsic_get_block_type(state, l->block_ref_to),
2463                                l->block_ref_to->logical_bytenr,
2464                                l->block_ref_to->dev_state->name,
2465                                l->block_ref_to->dev_bytenr,
2466                                l->block_ref_to->mirror_num, block->flush_gen,
2467                                l->block_ref_to->dev_state->last_flush_gen);
2468                         ret = -1;
2469                 } else if (-1 == btrfsic_check_all_ref_blocks(state,
2470                                                               l->block_ref_to,
2471                                                               recursion_level +
2472                                                               1)) {
2473                         ret = -1;
2474                 }
2475         }
2476
2477         return ret;
2478 }
2479
2480 static int btrfsic_is_block_ref_by_superblock(
2481                 const struct btrfsic_state *state,
2482                 const struct btrfsic_block *block,
2483                 int recursion_level)
2484 {
2485         const struct btrfsic_block_link *l;
2486
2487         if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
2488                 /* refer to comment at "abort cyclic linkage (case 1)" */
2489                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2490                         printk(KERN_INFO
2491                                "btrfsic: abort cyclic linkage (case 2).\n");
2492
2493                 return 0;
2494         }
2495
2496         /*
2497          * This algorithm is recursive because the amount of used stack space
2498          * is very small and the max recursion depth is limited.
2499          */
2500         list_for_each_entry(l, &block->ref_from_list, node_ref_from) {
2501                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2502                         printk(KERN_INFO
2503                                "rl=%d, %c @%llu (%s/%llu/%d) is ref %u* from %c @%llu (%s/%llu/%d)\n",
2504                                recursion_level,
2505                                btrfsic_get_block_type(state, block),
2506                                block->logical_bytenr, block->dev_state->name,
2507                                block->dev_bytenr, block->mirror_num,
2508                                l->ref_cnt,
2509                                btrfsic_get_block_type(state, l->block_ref_from),
2510                                l->block_ref_from->logical_bytenr,
2511                                l->block_ref_from->dev_state->name,
2512                                l->block_ref_from->dev_bytenr,
2513                                l->block_ref_from->mirror_num);
2514                 if (l->block_ref_from->is_superblock &&
2515                     state->latest_superblock->dev_bytenr ==
2516                     l->block_ref_from->dev_bytenr &&
2517                     state->latest_superblock->dev_state->bdev ==
2518                     l->block_ref_from->dev_state->bdev)
2519                         return 1;
2520                 else if (btrfsic_is_block_ref_by_superblock(state,
2521                                                             l->block_ref_from,
2522                                                             recursion_level +
2523                                                             1))
2524                         return 1;
2525         }
2526
2527         return 0;
2528 }
2529
2530 static void btrfsic_print_add_link(const struct btrfsic_state *state,
2531                                    const struct btrfsic_block_link *l)
2532 {
2533         printk(KERN_INFO
2534                "Add %u* link from %c @%llu (%s/%llu/%d) to %c @%llu (%s/%llu/%d).\n",
2535                l->ref_cnt,
2536                btrfsic_get_block_type(state, l->block_ref_from),
2537                l->block_ref_from->logical_bytenr,
2538                l->block_ref_from->dev_state->name,
2539                l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
2540                btrfsic_get_block_type(state, l->block_ref_to),
2541                l->block_ref_to->logical_bytenr,
2542                l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr,
2543                l->block_ref_to->mirror_num);
2544 }
2545
2546 static void btrfsic_print_rem_link(const struct btrfsic_state *state,
2547                                    const struct btrfsic_block_link *l)
2548 {
2549         printk(KERN_INFO
2550                "Rem %u* link from %c @%llu (%s/%llu/%d) to %c @%llu (%s/%llu/%d).\n",
2551                l->ref_cnt,
2552                btrfsic_get_block_type(state, l->block_ref_from),
2553                l->block_ref_from->logical_bytenr,
2554                l->block_ref_from->dev_state->name,
2555                l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
2556                btrfsic_get_block_type(state, l->block_ref_to),
2557                l->block_ref_to->logical_bytenr,
2558                l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr,
2559                l->block_ref_to->mirror_num);
2560 }
2561
2562 static char btrfsic_get_block_type(const struct btrfsic_state *state,
2563                                    const struct btrfsic_block *block)
2564 {
2565         if (block->is_superblock &&
2566             state->latest_superblock->dev_bytenr == block->dev_bytenr &&
2567             state->latest_superblock->dev_state->bdev == block->dev_state->bdev)
2568                 return 'S';
2569         else if (block->is_superblock)
2570                 return 's';
2571         else if (block->is_metadata)
2572                 return 'M';
2573         else
2574                 return 'D';
2575 }
2576
2577 static void btrfsic_dump_tree(const struct btrfsic_state *state)
2578 {
2579         btrfsic_dump_tree_sub(state, state->latest_superblock, 0);
2580 }
2581
2582 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
2583                                   const struct btrfsic_block *block,
2584                                   int indent_level)
2585 {
2586         const struct btrfsic_block_link *l;
2587         int indent_add;
2588         static char buf[80];
2589         int cursor_position;
2590
2591         /*
2592          * Should better fill an on-stack buffer with a complete line and
2593          * dump it at once when it is time to print a newline character.
2594          */
2595
2596         /*
2597          * This algorithm is recursive because the amount of used stack space
2598          * is very small and the max recursion depth is limited.
2599          */
2600         indent_add = sprintf(buf, "%c-%llu(%s/%llu/%u)",
2601                              btrfsic_get_block_type(state, block),
2602                              block->logical_bytenr, block->dev_state->name,
2603                              block->dev_bytenr, block->mirror_num);
2604         if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2605                 printk("[...]\n");
2606                 return;
2607         }
2608         printk(buf);
2609         indent_level += indent_add;
2610         if (list_empty(&block->ref_to_list)) {
2611                 printk("\n");
2612                 return;
2613         }
2614         if (block->mirror_num > 1 &&
2615             !(state->print_mask & BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS)) {
2616                 printk(" [...]\n");
2617                 return;
2618         }
2619
2620         cursor_position = indent_level;
2621         list_for_each_entry(l, &block->ref_to_list, node_ref_to) {
2622                 while (cursor_position < indent_level) {
2623                         printk(" ");
2624                         cursor_position++;
2625                 }
2626                 if (l->ref_cnt > 1)
2627                         indent_add = sprintf(buf, " %d*--> ", l->ref_cnt);
2628                 else
2629                         indent_add = sprintf(buf, " --> ");
2630                 if (indent_level + indent_add >
2631                     BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2632                         printk("[...]\n");
2633                         cursor_position = 0;
2634                         continue;
2635                 }
2636
2637                 printk(buf);
2638
2639                 btrfsic_dump_tree_sub(state, l->block_ref_to,
2640                                       indent_level + indent_add);
2641                 cursor_position = 0;
2642         }
2643 }
2644
2645 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
2646                 struct btrfsic_state *state,
2647                 struct btrfsic_block_data_ctx *next_block_ctx,
2648                 struct btrfsic_block *next_block,
2649                 struct btrfsic_block *from_block,
2650                 u64 parent_generation)
2651 {
2652         struct btrfsic_block_link *l;
2653
2654         l = btrfsic_block_link_hashtable_lookup(next_block_ctx->dev->bdev,
2655                                                 next_block_ctx->dev_bytenr,
2656                                                 from_block->dev_state->bdev,
2657                                                 from_block->dev_bytenr,
2658                                                 &state->block_link_hashtable);
2659         if (NULL == l) {
2660                 l = btrfsic_block_link_alloc();
2661                 if (NULL == l) {
2662                         printk(KERN_INFO
2663                                "btrfsic: error, kmalloc failed!\n");
2664                         return NULL;
2665                 }
2666
2667                 l->block_ref_to = next_block;
2668                 l->block_ref_from = from_block;
2669                 l->ref_cnt = 1;
2670                 l->parent_generation = parent_generation;
2671
2672                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2673                         btrfsic_print_add_link(state, l);
2674
2675                 list_add(&l->node_ref_to, &from_block->ref_to_list);
2676                 list_add(&l->node_ref_from, &next_block->ref_from_list);
2677
2678                 btrfsic_block_link_hashtable_add(l,
2679                                                  &state->block_link_hashtable);
2680         } else {
2681                 l->ref_cnt++;
2682                 l->parent_generation = parent_generation;
2683                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2684                         btrfsic_print_add_link(state, l);
2685         }
2686
2687         return l;
2688 }
2689
2690 static struct btrfsic_block *btrfsic_block_lookup_or_add(
2691                 struct btrfsic_state *state,
2692                 struct btrfsic_block_data_ctx *block_ctx,
2693                 const char *additional_string,
2694                 int is_metadata,
2695                 int is_iodone,
2696                 int never_written,
2697                 int mirror_num,
2698                 int *was_created)
2699 {
2700         struct btrfsic_block *block;
2701
2702         block = btrfsic_block_hashtable_lookup(block_ctx->dev->bdev,
2703                                                block_ctx->dev_bytenr,
2704                                                &state->block_hashtable);
2705         if (NULL == block) {
2706                 struct btrfsic_dev_state *dev_state;
2707
2708                 block = btrfsic_block_alloc();
2709                 if (NULL == block) {
2710                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
2711                         return NULL;
2712                 }
2713                 dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev);
2714                 if (NULL == dev_state) {
2715                         printk(KERN_INFO
2716                                "btrfsic: error, lookup dev_state failed!\n");
2717                         btrfsic_block_free(block);
2718                         return NULL;
2719                 }
2720                 block->dev_state = dev_state;
2721                 block->dev_bytenr = block_ctx->dev_bytenr;
2722                 block->logical_bytenr = block_ctx->start;
2723                 block->is_metadata = is_metadata;
2724                 block->is_iodone = is_iodone;
2725                 block->never_written = never_written;
2726                 block->mirror_num = mirror_num;
2727                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2728                         printk(KERN_INFO
2729                                "New %s%c-block @%llu (%s/%llu/%d)\n",
2730                                additional_string,
2731                                btrfsic_get_block_type(state, block),
2732                                block->logical_bytenr, dev_state->name,
2733                                block->dev_bytenr, mirror_num);
2734                 list_add(&block->all_blocks_node, &state->all_blocks_list);
2735                 btrfsic_block_hashtable_add(block, &state->block_hashtable);
2736                 if (NULL != was_created)
2737                         *was_created = 1;
2738         } else {
2739                 if (NULL != was_created)
2740                         *was_created = 0;
2741         }
2742
2743         return block;
2744 }
2745
2746 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
2747                                            u64 bytenr,
2748                                            struct btrfsic_dev_state *dev_state,
2749                                            u64 dev_bytenr)
2750 {
2751         int num_copies;
2752         int mirror_num;
2753         int ret;
2754         struct btrfsic_block_data_ctx block_ctx;
2755         int match = 0;
2756
2757         num_copies = btrfs_num_copies(state->root->fs_info,
2758                                       bytenr, state->metablock_size);
2759
2760         for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2761                 ret = btrfsic_map_block(state, bytenr, state->metablock_size,
2762                                         &block_ctx, mirror_num);
2763                 if (ret) {
2764                         printk(KERN_INFO "btrfsic: btrfsic_map_block(logical @%llu, mirror %d) failed!\n",
2765                                bytenr, mirror_num);
2766                         continue;
2767                 }
2768
2769                 if (dev_state->bdev == block_ctx.dev->bdev &&
2770                     dev_bytenr == block_ctx.dev_bytenr) {
2771                         match++;
2772                         btrfsic_release_block_ctx(&block_ctx);
2773                         break;
2774                 }
2775                 btrfsic_release_block_ctx(&block_ctx);
2776         }
2777
2778         if (WARN_ON(!match)) {
2779                 printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio, buffer->log_bytenr=%llu, submit_bio(bdev=%s, phys_bytenr=%llu)!\n",
2780                        bytenr, dev_state->name, dev_bytenr);
2781                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2782                         ret = btrfsic_map_block(state, bytenr,
2783                                                 state->metablock_size,
2784                                                 &block_ctx, mirror_num);
2785                         if (ret)
2786                                 continue;
2787
2788                         printk(KERN_INFO "Read logical bytenr @%llu maps to (%s/%llu/%d)\n",
2789                                bytenr, block_ctx.dev->name,
2790                                block_ctx.dev_bytenr, mirror_num);
2791                 }
2792         }
2793 }
2794
2795 static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
2796                 struct block_device *bdev)
2797 {
2798         return btrfsic_dev_state_hashtable_lookup(bdev,
2799                                                   &btrfsic_dev_state_hashtable);
2800 }
2801
2802 int btrfsic_submit_bh(int op, int op_flags, struct buffer_head *bh)
2803 {
2804         struct btrfsic_dev_state *dev_state;
2805
2806         if (!btrfsic_is_initialized)
2807                 return submit_bh(op, op_flags, bh);
2808
2809         mutex_lock(&btrfsic_mutex);
2810         /* since btrfsic_submit_bh() might also be called before
2811          * btrfsic_mount(), this might return NULL */
2812         dev_state = btrfsic_dev_state_lookup(bh->b_bdev);
2813
2814         /* Only called to write the superblock (incl. FLUSH/FUA) */
2815         if (NULL != dev_state &&
2816             (op == REQ_OP_WRITE) && bh->b_size > 0) {
2817                 u64 dev_bytenr;
2818
2819                 dev_bytenr = 4096 * bh->b_blocknr;
2820                 if (dev_state->state->print_mask &
2821                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
2822                         printk(KERN_INFO
2823                                "submit_bh(op=0x%x,0x%x, blocknr=%llu (bytenr %llu), size=%zu, data=%p, bdev=%p)\n",
2824                                op, op_flags, (unsigned long long)bh->b_blocknr,
2825                                dev_bytenr, bh->b_size, bh->b_data, bh->b_bdev);
2826                 btrfsic_process_written_block(dev_state, dev_bytenr,
2827                                               &bh->b_data, 1, NULL,
2828                                               NULL, bh, op_flags);
2829         } else if (NULL != dev_state && (op_flags & REQ_PREFLUSH)) {
2830                 if (dev_state->state->print_mask &
2831                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
2832                         printk(KERN_INFO
2833                                "submit_bh(op=0x%x,0x%x FLUSH, bdev=%p)\n",
2834                                op, op_flags, bh->b_bdev);
2835                 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
2836                         if ((dev_state->state->print_mask &
2837                              (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
2838                               BTRFSIC_PRINT_MASK_VERBOSE)))
2839                                 printk(KERN_INFO
2840                                        "btrfsic_submit_bh(%s) with FLUSH but dummy block already in use (ignored)!\n",
2841                                        dev_state->name);
2842                 } else {
2843                         struct btrfsic_block *const block =
2844                                 &dev_state->dummy_block_for_bio_bh_flush;
2845
2846                         block->is_iodone = 0;
2847                         block->never_written = 0;
2848                         block->iodone_w_error = 0;
2849                         block->flush_gen = dev_state->last_flush_gen + 1;
2850                         block->submit_bio_bh_rw = op_flags;
2851                         block->orig_bio_bh_private = bh->b_private;
2852                         block->orig_bio_bh_end_io.bh = bh->b_end_io;
2853                         block->next_in_same_bio = NULL;
2854                         bh->b_private = block;
2855                         bh->b_end_io = btrfsic_bh_end_io;
2856                 }
2857         }
2858         mutex_unlock(&btrfsic_mutex);
2859         return submit_bh(op, op_flags, bh);
2860 }
2861
2862 static void __btrfsic_submit_bio(struct bio *bio)
2863 {
2864         struct btrfsic_dev_state *dev_state;
2865
2866         if (!btrfsic_is_initialized)
2867                 return;
2868
2869         mutex_lock(&btrfsic_mutex);
2870         /* since btrfsic_submit_bio() is also called before
2871          * btrfsic_mount(), this might return NULL */
2872         dev_state = btrfsic_dev_state_lookup(bio->bi_bdev);
2873         if (NULL != dev_state &&
2874             (bio_op(bio) == REQ_OP_WRITE) && NULL != bio->bi_io_vec) {
2875                 unsigned int i;
2876                 u64 dev_bytenr;
2877                 u64 cur_bytenr;
2878                 int bio_is_patched;
2879                 char **mapped_datav;
2880
2881                 dev_bytenr = 512 * bio->bi_iter.bi_sector;
2882                 bio_is_patched = 0;
2883                 if (dev_state->state->print_mask &
2884                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
2885                         printk(KERN_INFO
2886                                "submit_bio(rw=%d,0x%x, bi_vcnt=%u, bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n",
2887                                bio_op(bio), bio->bi_opf, bio->bi_vcnt,
2888                                (unsigned long long)bio->bi_iter.bi_sector,
2889                                dev_bytenr, bio->bi_bdev);
2890
2891                 mapped_datav = kmalloc_array(bio->bi_vcnt,
2892                                              sizeof(*mapped_datav), GFP_NOFS);
2893                 if (!mapped_datav)
2894                         goto leave;
2895                 cur_bytenr = dev_bytenr;
2896                 for (i = 0; i < bio->bi_vcnt; i++) {
2897                         BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_SIZE);
2898                         mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page);
2899                         if (!mapped_datav[i]) {
2900                                 while (i > 0) {
2901                                         i--;
2902                                         kunmap(bio->bi_io_vec[i].bv_page);
2903                                 }
2904                                 kfree(mapped_datav);
2905                                 goto leave;
2906                         }
2907                         if (dev_state->state->print_mask &
2908                             BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE)
2909                                 printk(KERN_INFO
2910                                        "#%u: bytenr=%llu, len=%u, offset=%u\n",
2911                                        i, cur_bytenr, bio->bi_io_vec[i].bv_len,
2912                                        bio->bi_io_vec[i].bv_offset);
2913                         cur_bytenr += bio->bi_io_vec[i].bv_len;
2914                 }
2915                 btrfsic_process_written_block(dev_state, dev_bytenr,
2916                                               mapped_datav, bio->bi_vcnt,
2917                                               bio, &bio_is_patched,
2918                                               NULL, bio->bi_opf);
2919                 while (i > 0) {
2920                         i--;
2921                         kunmap(bio->bi_io_vec[i].bv_page);
2922                 }
2923                 kfree(mapped_datav);
2924         } else if (NULL != dev_state && (bio->bi_opf & REQ_PREFLUSH)) {
2925                 if (dev_state->state->print_mask &
2926                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
2927                         printk(KERN_INFO
2928                                "submit_bio(rw=%d,0x%x FLUSH, bdev=%p)\n",
2929                                bio_op(bio), bio->bi_opf, bio->bi_bdev);
2930                 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
2931                         if ((dev_state->state->print_mask &
2932                              (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
2933                               BTRFSIC_PRINT_MASK_VERBOSE)))
2934                                 printk(KERN_INFO
2935                                        "btrfsic_submit_bio(%s) with FLUSH but dummy block already in use (ignored)!\n",
2936                                        dev_state->name);
2937                 } else {
2938                         struct btrfsic_block *const block =
2939                                 &dev_state->dummy_block_for_bio_bh_flush;
2940
2941                         block->is_iodone = 0;
2942                         block->never_written = 0;
2943                         block->iodone_w_error = 0;
2944                         block->flush_gen = dev_state->last_flush_gen + 1;
2945                         block->submit_bio_bh_rw = bio->bi_opf;
2946                         block->orig_bio_bh_private = bio->bi_private;
2947                         block->orig_bio_bh_end_io.bio = bio->bi_end_io;
2948                         block->next_in_same_bio = NULL;
2949                         bio->bi_private = block;
2950                         bio->bi_end_io = btrfsic_bio_end_io;
2951                 }
2952         }
2953 leave:
2954         mutex_unlock(&btrfsic_mutex);
2955 }
2956
2957 void btrfsic_submit_bio(struct bio *bio)
2958 {
2959         __btrfsic_submit_bio(bio);
2960         submit_bio(bio);
2961 }
2962
2963 int btrfsic_submit_bio_wait(struct bio *bio)
2964 {
2965         __btrfsic_submit_bio(bio);
2966         return submit_bio_wait(bio);
2967 }
2968
2969 int btrfsic_mount(struct btrfs_root *root,
2970                   struct btrfs_fs_devices *fs_devices,
2971                   int including_extent_data, u32 print_mask)
2972 {
2973         int ret;
2974         struct btrfsic_state *state;
2975         struct list_head *dev_head = &fs_devices->devices;
2976         struct btrfs_device *device;
2977
2978         if (root->nodesize & ((u64)PAGE_SIZE - 1)) {
2979                 printk(KERN_INFO
2980                        "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_SIZE %ld!\n",
2981                        root->nodesize, PAGE_SIZE);
2982                 return -1;
2983         }
2984         if (root->sectorsize & ((u64)PAGE_SIZE - 1)) {
2985                 printk(KERN_INFO
2986                        "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_SIZE %ld!\n",
2987                        root->sectorsize, PAGE_SIZE);
2988                 return -1;
2989         }
2990         state = kzalloc(sizeof(*state), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
2991         if (!state) {
2992                 state = vzalloc(sizeof(*state));
2993                 if (!state) {
2994                         printk(KERN_INFO "btrfs check-integrity: vzalloc() failed!\n");
2995                         return -1;
2996                 }
2997         }
2998
2999         if (!btrfsic_is_initialized) {
3000                 mutex_init(&btrfsic_mutex);
3001                 btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable);
3002                 btrfsic_is_initialized = 1;
3003         }
3004         mutex_lock(&btrfsic_mutex);
3005         state->root = root;
3006         state->print_mask = print_mask;
3007         state->include_extent_data = including_extent_data;
3008         state->csum_size = 0;
3009         state->metablock_size = root->nodesize;
3010         state->datablock_size = root->sectorsize;
3011         INIT_LIST_HEAD(&state->all_blocks_list);
3012         btrfsic_block_hashtable_init(&state->block_hashtable);
3013         btrfsic_block_link_hashtable_init(&state->block_link_hashtable);
3014         state->max_superblock_generation = 0;
3015         state->latest_superblock = NULL;
3016
3017         list_for_each_entry(device, dev_head, dev_list) {
3018                 struct btrfsic_dev_state *ds;
3019                 const char *p;
3020
3021                 if (!device->bdev || !device->name)
3022                         continue;
3023
3024                 ds = btrfsic_dev_state_alloc();
3025                 if (NULL == ds) {
3026                         printk(KERN_INFO
3027                                "btrfs check-integrity: kmalloc() failed!\n");
3028                         mutex_unlock(&btrfsic_mutex);
3029                         return -1;
3030                 }
3031                 ds->bdev = device->bdev;
3032                 ds->state = state;
3033                 bdevname(ds->bdev, ds->name);
3034                 ds->name[BDEVNAME_SIZE - 1] = '\0';
3035                 p = kbasename(ds->name);
3036                 strlcpy(ds->name, p, sizeof(ds->name));
3037                 btrfsic_dev_state_hashtable_add(ds,
3038                                                 &btrfsic_dev_state_hashtable);
3039         }
3040
3041         ret = btrfsic_process_superblock(state, fs_devices);
3042         if (0 != ret) {
3043                 mutex_unlock(&btrfsic_mutex);
3044                 btrfsic_unmount(root, fs_devices);
3045                 return ret;
3046         }
3047
3048         if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_DATABASE)
3049                 btrfsic_dump_database(state);
3050         if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_TREE)
3051                 btrfsic_dump_tree(state);
3052
3053         mutex_unlock(&btrfsic_mutex);
3054         return 0;
3055 }
3056
3057 void btrfsic_unmount(struct btrfs_root *root,
3058                      struct btrfs_fs_devices *fs_devices)
3059 {
3060         struct btrfsic_block *b_all, *tmp_all;
3061         struct btrfsic_state *state;
3062         struct list_head *dev_head = &fs_devices->devices;
3063         struct btrfs_device *device;
3064
3065         if (!btrfsic_is_initialized)
3066                 return;
3067
3068         mutex_lock(&btrfsic_mutex);
3069
3070         state = NULL;
3071         list_for_each_entry(device, dev_head, dev_list) {
3072                 struct btrfsic_dev_state *ds;
3073
3074                 if (!device->bdev || !device->name)
3075                         continue;
3076
3077                 ds = btrfsic_dev_state_hashtable_lookup(
3078                                 device->bdev,
3079                                 &btrfsic_dev_state_hashtable);
3080                 if (NULL != ds) {
3081                         state = ds->state;
3082                         btrfsic_dev_state_hashtable_remove(ds);
3083                         btrfsic_dev_state_free(ds);
3084                 }
3085         }
3086
3087         if (NULL == state) {
3088                 printk(KERN_INFO
3089                        "btrfsic: error, cannot find state information on umount!\n");
3090                 mutex_unlock(&btrfsic_mutex);
3091                 return;
3092         }
3093
3094         /*
3095          * Don't care about keeping the lists' state up to date,
3096          * just free all memory that was allocated dynamically.
3097          * Free the blocks and the block_links.
3098          */
3099         list_for_each_entry_safe(b_all, tmp_all, &state->all_blocks_list,
3100                                  all_blocks_node) {
3101                 struct btrfsic_block_link *l, *tmp;
3102
3103                 list_for_each_entry_safe(l, tmp, &b_all->ref_to_list,
3104                                          node_ref_to) {
3105                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
3106                                 btrfsic_print_rem_link(state, l);
3107
3108                         l->ref_cnt--;
3109                         if (0 == l->ref_cnt)
3110                                 btrfsic_block_link_free(l);
3111                 }
3112
3113                 if (b_all->is_iodone || b_all->never_written)
3114                         btrfsic_block_free(b_all);
3115                 else
3116                         printk(KERN_INFO "btrfs: attempt to free %c-block @%llu (%s/%llu/%d) on umount which is not yet iodone!\n",
3117                                btrfsic_get_block_type(state, b_all),
3118                                b_all->logical_bytenr, b_all->dev_state->name,
3119                                b_all->dev_bytenr, b_all->mirror_num);
3120         }
3121
3122         mutex_unlock(&btrfsic_mutex);
3123
3124         kvfree(state);
3125 }