spi: rspi: Re-do the returning value of qspi_transfer_out_in
[cascardo/linux.git] / drivers / md / bitmap.c
1 /*
2  * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
3  *
4  * bitmap_create  - sets up the bitmap structure
5  * bitmap_destroy - destroys the bitmap structure
6  *
7  * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.:
8  * - added disk storage for bitmap
9  * - changes to allow various bitmap chunk sizes
10  */
11
12 /*
13  * Still to do:
14  *
15  * flush after percent set rather than just time based. (maybe both).
16  */
17
18 #include <linux/blkdev.h>
19 #include <linux/module.h>
20 #include <linux/errno.h>
21 #include <linux/slab.h>
22 #include <linux/init.h>
23 #include <linux/timer.h>
24 #include <linux/sched.h>
25 #include <linux/list.h>
26 #include <linux/file.h>
27 #include <linux/mount.h>
28 #include <linux/buffer_head.h>
29 #include <linux/seq_file.h>
30 #include "md.h"
31 #include "bitmap.h"
32
33 static inline char *bmname(struct bitmap *bitmap)
34 {
35         return bitmap->mddev ? mdname(bitmap->mddev) : "mdX";
36 }
37
38 /*
39  * check a page and, if necessary, allocate it (or hijack it if the alloc fails)
40  *
41  * 1) check to see if this page is allocated, if it's not then try to alloc
42  * 2) if the alloc fails, set the page's hijacked flag so we'll use the
43  *    page pointer directly as a counter
44  *
45  * if we find our page, we increment the page's refcount so that it stays
46  * allocated while we're using it
47  */
48 static int bitmap_checkpage(struct bitmap_counts *bitmap,
49                             unsigned long page, int create)
50 __releases(bitmap->lock)
51 __acquires(bitmap->lock)
52 {
53         unsigned char *mappage;
54
55         if (page >= bitmap->pages) {
56                 /* This can happen if bitmap_start_sync goes beyond
57                  * End-of-device while looking for a whole page.
58                  * It is harmless.
59                  */
60                 return -EINVAL;
61         }
62
63         if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */
64                 return 0;
65
66         if (bitmap->bp[page].map) /* page is already allocated, just return */
67                 return 0;
68
69         if (!create)
70                 return -ENOENT;
71
72         /* this page has not been allocated yet */
73
74         spin_unlock_irq(&bitmap->lock);
75         /* It is possible that this is being called inside a
76          * prepare_to_wait/finish_wait loop from raid5c:make_request().
77          * In general it is not permitted to sleep in that context as it
78          * can cause the loop to spin freely.
79          * That doesn't apply here as we can only reach this point
80          * once with any loop.
81          * When this function completes, either bp[page].map or
82          * bp[page].hijacked.  In either case, this function will
83          * abort before getting to this point again.  So there is
84          * no risk of a free-spin, and so it is safe to assert
85          * that sleeping here is allowed.
86          */
87         sched_annotate_sleep();
88         mappage = kzalloc(PAGE_SIZE, GFP_NOIO);
89         spin_lock_irq(&bitmap->lock);
90
91         if (mappage == NULL) {
92                 pr_debug("md/bitmap: map page allocation failed, hijacking\n");
93                 /* failed - set the hijacked flag so that we can use the
94                  * pointer as a counter */
95                 if (!bitmap->bp[page].map)
96                         bitmap->bp[page].hijacked = 1;
97         } else if (bitmap->bp[page].map ||
98                    bitmap->bp[page].hijacked) {
99                 /* somebody beat us to getting the page */
100                 kfree(mappage);
101                 return 0;
102         } else {
103
104                 /* no page was in place and we have one, so install it */
105
106                 bitmap->bp[page].map = mappage;
107                 bitmap->missing_pages--;
108         }
109         return 0;
110 }
111
112 /* if page is completely empty, put it back on the free list, or dealloc it */
113 /* if page was hijacked, unmark the flag so it might get alloced next time */
114 /* Note: lock should be held when calling this */
115 static void bitmap_checkfree(struct bitmap_counts *bitmap, unsigned long page)
116 {
117         char *ptr;
118
119         if (bitmap->bp[page].count) /* page is still busy */
120                 return;
121
122         /* page is no longer in use, it can be released */
123
124         if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */
125                 bitmap->bp[page].hijacked = 0;
126                 bitmap->bp[page].map = NULL;
127         } else {
128                 /* normal case, free the page */
129                 ptr = bitmap->bp[page].map;
130                 bitmap->bp[page].map = NULL;
131                 bitmap->missing_pages++;
132                 kfree(ptr);
133         }
134 }
135
136 /*
137  * bitmap file handling - read and write the bitmap file and its superblock
138  */
139
140 /*
141  * basic page I/O operations
142  */
143
144 /* IO operations when bitmap is stored near all superblocks */
145 static int read_sb_page(struct mddev *mddev, loff_t offset,
146                         struct page *page,
147                         unsigned long index, int size)
148 {
149         /* choose a good rdev and read the page from there */
150
151         struct md_rdev *rdev;
152         sector_t target;
153
154         rdev_for_each(rdev, mddev) {
155                 if (! test_bit(In_sync, &rdev->flags)
156                     || test_bit(Faulty, &rdev->flags))
157                         continue;
158
159                 target = offset + index * (PAGE_SIZE/512);
160
161                 if (sync_page_io(rdev, target,
162                                  roundup(size, bdev_logical_block_size(rdev->bdev)),
163                                  page, READ, true)) {
164                         page->index = index;
165                         return 0;
166                 }
167         }
168         return -EIO;
169 }
170
171 static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mddev)
172 {
173         /* Iterate the disks of an mddev, using rcu to protect access to the
174          * linked list, and raising the refcount of devices we return to ensure
175          * they don't disappear while in use.
176          * As devices are only added or removed when raid_disk is < 0 and
177          * nr_pending is 0 and In_sync is clear, the entries we return will
178          * still be in the same position on the list when we re-enter
179          * list_for_each_entry_continue_rcu.
180          */
181         rcu_read_lock();
182         if (rdev == NULL)
183                 /* start at the beginning */
184                 rdev = list_entry_rcu(&mddev->disks, struct md_rdev, same_set);
185         else {
186                 /* release the previous rdev and start from there. */
187                 rdev_dec_pending(rdev, mddev);
188         }
189         list_for_each_entry_continue_rcu(rdev, &mddev->disks, same_set) {
190                 if (rdev->raid_disk >= 0 &&
191                     !test_bit(Faulty, &rdev->flags)) {
192                         /* this is a usable devices */
193                         atomic_inc(&rdev->nr_pending);
194                         rcu_read_unlock();
195                         return rdev;
196                 }
197         }
198         rcu_read_unlock();
199         return NULL;
200 }
201
202 static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
203 {
204         struct md_rdev *rdev = NULL;
205         struct block_device *bdev;
206         struct mddev *mddev = bitmap->mddev;
207         struct bitmap_storage *store = &bitmap->storage;
208         int node_offset = 0;
209
210         if (mddev_is_clustered(bitmap->mddev))
211                 node_offset = bitmap->cluster_slot * store->file_pages;
212
213         while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
214                 int size = PAGE_SIZE;
215                 loff_t offset = mddev->bitmap_info.offset;
216
217                 bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
218
219                 if (page->index == store->file_pages-1) {
220                         int last_page_size = store->bytes & (PAGE_SIZE-1);
221                         if (last_page_size == 0)
222                                 last_page_size = PAGE_SIZE;
223                         size = roundup(last_page_size,
224                                        bdev_logical_block_size(bdev));
225                 }
226                 /* Just make sure we aren't corrupting data or
227                  * metadata
228                  */
229                 if (mddev->external) {
230                         /* Bitmap could be anywhere. */
231                         if (rdev->sb_start + offset + (page->index
232                                                        * (PAGE_SIZE/512))
233                             > rdev->data_offset
234                             &&
235                             rdev->sb_start + offset
236                             < (rdev->data_offset + mddev->dev_sectors
237                              + (PAGE_SIZE/512)))
238                                 goto bad_alignment;
239                 } else if (offset < 0) {
240                         /* DATA  BITMAP METADATA  */
241                         if (offset
242                             + (long)(page->index * (PAGE_SIZE/512))
243                             + size/512 > 0)
244                                 /* bitmap runs in to metadata */
245                                 goto bad_alignment;
246                         if (rdev->data_offset + mddev->dev_sectors
247                             > rdev->sb_start + offset)
248                                 /* data runs in to bitmap */
249                                 goto bad_alignment;
250                 } else if (rdev->sb_start < rdev->data_offset) {
251                         /* METADATA BITMAP DATA */
252                         if (rdev->sb_start
253                             + offset
254                             + page->index*(PAGE_SIZE/512) + size/512
255                             > rdev->data_offset)
256                                 /* bitmap runs in to data */
257                                 goto bad_alignment;
258                 } else {
259                         /* DATA METADATA BITMAP - no problems */
260                 }
261                 md_super_write(mddev, rdev,
262                                rdev->sb_start + offset
263                                + page->index * (PAGE_SIZE/512),
264                                size,
265                                page);
266         }
267
268         if (wait)
269                 md_super_wait(mddev);
270         return 0;
271
272  bad_alignment:
273         return -EINVAL;
274 }
275
276 static void bitmap_file_kick(struct bitmap *bitmap);
277 /*
278  * write out a page to a file
279  */
280 static void write_page(struct bitmap *bitmap, struct page *page, int wait)
281 {
282         struct buffer_head *bh;
283
284         if (bitmap->storage.file == NULL) {
285                 switch (write_sb_page(bitmap, page, wait)) {
286                 case -EINVAL:
287                         set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
288                 }
289         } else {
290
291                 bh = page_buffers(page);
292
293                 while (bh && bh->b_blocknr) {
294                         atomic_inc(&bitmap->pending_writes);
295                         set_buffer_locked(bh);
296                         set_buffer_mapped(bh);
297                         submit_bh(WRITE | REQ_SYNC, bh);
298                         bh = bh->b_this_page;
299                 }
300
301                 if (wait)
302                         wait_event(bitmap->write_wait,
303                                    atomic_read(&bitmap->pending_writes)==0);
304         }
305         if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
306                 bitmap_file_kick(bitmap);
307 }
308
309 static void end_bitmap_write(struct buffer_head *bh, int uptodate)
310 {
311         struct bitmap *bitmap = bh->b_private;
312
313         if (!uptodate)
314                 set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
315         if (atomic_dec_and_test(&bitmap->pending_writes))
316                 wake_up(&bitmap->write_wait);
317 }
318
319 /* copied from buffer.c */
320 static void
321 __clear_page_buffers(struct page *page)
322 {
323         ClearPagePrivate(page);
324         set_page_private(page, 0);
325         page_cache_release(page);
326 }
327 static void free_buffers(struct page *page)
328 {
329         struct buffer_head *bh;
330
331         if (!PagePrivate(page))
332                 return;
333
334         bh = page_buffers(page);
335         while (bh) {
336                 struct buffer_head *next = bh->b_this_page;
337                 free_buffer_head(bh);
338                 bh = next;
339         }
340         __clear_page_buffers(page);
341         put_page(page);
342 }
343
344 /* read a page from a file.
345  * We both read the page, and attach buffers to the page to record the
346  * address of each block (using bmap).  These addresses will be used
347  * to write the block later, completely bypassing the filesystem.
348  * This usage is similar to how swap files are handled, and allows us
349  * to write to a file with no concerns of memory allocation failing.
350  */
351 static int read_page(struct file *file, unsigned long index,
352                      struct bitmap *bitmap,
353                      unsigned long count,
354                      struct page *page)
355 {
356         int ret = 0;
357         struct inode *inode = file_inode(file);
358         struct buffer_head *bh;
359         sector_t block;
360
361         pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE,
362                  (unsigned long long)index << PAGE_SHIFT);
363
364         bh = alloc_page_buffers(page, 1<<inode->i_blkbits, 0);
365         if (!bh) {
366                 ret = -ENOMEM;
367                 goto out;
368         }
369         attach_page_buffers(page, bh);
370         block = index << (PAGE_SHIFT - inode->i_blkbits);
371         while (bh) {
372                 if (count == 0)
373                         bh->b_blocknr = 0;
374                 else {
375                         bh->b_blocknr = bmap(inode, block);
376                         if (bh->b_blocknr == 0) {
377                                 /* Cannot use this file! */
378                                 ret = -EINVAL;
379                                 goto out;
380                         }
381                         bh->b_bdev = inode->i_sb->s_bdev;
382                         if (count < (1<<inode->i_blkbits))
383                                 count = 0;
384                         else
385                                 count -= (1<<inode->i_blkbits);
386
387                         bh->b_end_io = end_bitmap_write;
388                         bh->b_private = bitmap;
389                         atomic_inc(&bitmap->pending_writes);
390                         set_buffer_locked(bh);
391                         set_buffer_mapped(bh);
392                         submit_bh(READ, bh);
393                 }
394                 block++;
395                 bh = bh->b_this_page;
396         }
397         page->index = index;
398
399         wait_event(bitmap->write_wait,
400                    atomic_read(&bitmap->pending_writes)==0);
401         if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
402                 ret = -EIO;
403 out:
404         if (ret)
405                 printk(KERN_ALERT "md: bitmap read error: (%dB @ %llu): %d\n",
406                         (int)PAGE_SIZE,
407                         (unsigned long long)index << PAGE_SHIFT,
408                         ret);
409         return ret;
410 }
411
412 /*
413  * bitmap file superblock operations
414  */
415
416 /* update the event counter and sync the superblock to disk */
417 void bitmap_update_sb(struct bitmap *bitmap)
418 {
419         bitmap_super_t *sb;
420
421         if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
422                 return;
423         if (bitmap->mddev->bitmap_info.external)
424                 return;
425         if (!bitmap->storage.sb_page) /* no superblock */
426                 return;
427         sb = kmap_atomic(bitmap->storage.sb_page);
428         sb->events = cpu_to_le64(bitmap->mddev->events);
429         if (bitmap->mddev->events < bitmap->events_cleared)
430                 /* rocking back to read-only */
431                 bitmap->events_cleared = bitmap->mddev->events;
432         sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
433         sb->state = cpu_to_le32(bitmap->flags);
434         /* Just in case these have been changed via sysfs: */
435         sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ);
436         sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind);
437         /* This might have been changed by a reshape */
438         sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
439         sb->chunksize = cpu_to_le32(bitmap->mddev->bitmap_info.chunksize);
440         sb->nodes = cpu_to_le32(bitmap->mddev->bitmap_info.nodes);
441         sb->sectors_reserved = cpu_to_le32(bitmap->mddev->
442                                            bitmap_info.space);
443         kunmap_atomic(sb);
444         write_page(bitmap, bitmap->storage.sb_page, 1);
445 }
446
447 /* print out the bitmap file superblock */
448 void bitmap_print_sb(struct bitmap *bitmap)
449 {
450         bitmap_super_t *sb;
451
452         if (!bitmap || !bitmap->storage.sb_page)
453                 return;
454         sb = kmap_atomic(bitmap->storage.sb_page);
455         printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap));
456         printk(KERN_DEBUG "         magic: %08x\n", le32_to_cpu(sb->magic));
457         printk(KERN_DEBUG "       version: %d\n", le32_to_cpu(sb->version));
458         printk(KERN_DEBUG "          uuid: %08x.%08x.%08x.%08x\n",
459                                         *(__u32 *)(sb->uuid+0),
460                                         *(__u32 *)(sb->uuid+4),
461                                         *(__u32 *)(sb->uuid+8),
462                                         *(__u32 *)(sb->uuid+12));
463         printk(KERN_DEBUG "        events: %llu\n",
464                         (unsigned long long) le64_to_cpu(sb->events));
465         printk(KERN_DEBUG "events cleared: %llu\n",
466                         (unsigned long long) le64_to_cpu(sb->events_cleared));
467         printk(KERN_DEBUG "         state: %08x\n", le32_to_cpu(sb->state));
468         printk(KERN_DEBUG "     chunksize: %d B\n", le32_to_cpu(sb->chunksize));
469         printk(KERN_DEBUG "  daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep));
470         printk(KERN_DEBUG "     sync size: %llu KB\n",
471                         (unsigned long long)le64_to_cpu(sb->sync_size)/2);
472         printk(KERN_DEBUG "max write behind: %d\n", le32_to_cpu(sb->write_behind));
473         kunmap_atomic(sb);
474 }
475
476 /*
477  * bitmap_new_disk_sb
478  * @bitmap
479  *
480  * This function is somewhat the reverse of bitmap_read_sb.  bitmap_read_sb
481  * reads and verifies the on-disk bitmap superblock and populates bitmap_info.
482  * This function verifies 'bitmap_info' and populates the on-disk bitmap
483  * structure, which is to be written to disk.
484  *
485  * Returns: 0 on success, -Exxx on error
486  */
487 static int bitmap_new_disk_sb(struct bitmap *bitmap)
488 {
489         bitmap_super_t *sb;
490         unsigned long chunksize, daemon_sleep, write_behind;
491
492         bitmap->storage.sb_page = alloc_page(GFP_KERNEL);
493         if (bitmap->storage.sb_page == NULL)
494                 return -ENOMEM;
495         bitmap->storage.sb_page->index = 0;
496
497         sb = kmap_atomic(bitmap->storage.sb_page);
498
499         sb->magic = cpu_to_le32(BITMAP_MAGIC);
500         sb->version = cpu_to_le32(BITMAP_MAJOR_HI);
501
502         chunksize = bitmap->mddev->bitmap_info.chunksize;
503         BUG_ON(!chunksize);
504         if (!is_power_of_2(chunksize)) {
505                 kunmap_atomic(sb);
506                 printk(KERN_ERR "bitmap chunksize not a power of 2\n");
507                 return -EINVAL;
508         }
509         sb->chunksize = cpu_to_le32(chunksize);
510
511         daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep;
512         if (!daemon_sleep ||
513             (daemon_sleep < 1) || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
514                 printk(KERN_INFO "Choosing daemon_sleep default (5 sec)\n");
515                 daemon_sleep = 5 * HZ;
516         }
517         sb->daemon_sleep = cpu_to_le32(daemon_sleep);
518         bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
519
520         /*
521          * FIXME: write_behind for RAID1.  If not specified, what
522          * is a good choice?  We choose COUNTER_MAX / 2 arbitrarily.
523          */
524         write_behind = bitmap->mddev->bitmap_info.max_write_behind;
525         if (write_behind > COUNTER_MAX)
526                 write_behind = COUNTER_MAX / 2;
527         sb->write_behind = cpu_to_le32(write_behind);
528         bitmap->mddev->bitmap_info.max_write_behind = write_behind;
529
530         /* keep the array size field of the bitmap superblock up to date */
531         sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
532
533         memcpy(sb->uuid, bitmap->mddev->uuid, 16);
534
535         set_bit(BITMAP_STALE, &bitmap->flags);
536         sb->state = cpu_to_le32(bitmap->flags);
537         bitmap->events_cleared = bitmap->mddev->events;
538         sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
539
540         kunmap_atomic(sb);
541
542         return 0;
543 }
544
545 /* read the superblock from the bitmap file and initialize some bitmap fields */
546 static int bitmap_read_sb(struct bitmap *bitmap)
547 {
548         char *reason = NULL;
549         bitmap_super_t *sb;
550         unsigned long chunksize, daemon_sleep, write_behind;
551         unsigned long long events;
552         int nodes = 0;
553         unsigned long sectors_reserved = 0;
554         int err = -EINVAL;
555         struct page *sb_page;
556
557         if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) {
558                 chunksize = 128 * 1024 * 1024;
559                 daemon_sleep = 5 * HZ;
560                 write_behind = 0;
561                 set_bit(BITMAP_STALE, &bitmap->flags);
562                 err = 0;
563                 goto out_no_sb;
564         }
565         /* page 0 is the superblock, read it... */
566         sb_page = alloc_page(GFP_KERNEL);
567         if (!sb_page)
568                 return -ENOMEM;
569         bitmap->storage.sb_page = sb_page;
570
571 re_read:
572         /* If cluster_slot is set, the cluster is setup */
573         if (bitmap->cluster_slot >= 0) {
574                 sector_t bm_blocks = bitmap->mddev->resync_max_sectors;
575
576                 sector_div(bm_blocks,
577                            bitmap->mddev->bitmap_info.chunksize >> 9);
578                 /* bits to bytes */
579                 bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t);
580                 /* to 4k blocks */
581                 bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096);
582                 bitmap->mddev->bitmap_info.offset += bitmap->cluster_slot * (bm_blocks << 3);
583                 pr_info("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__,
584                         bitmap->cluster_slot, (unsigned long long)bitmap->mddev->bitmap_info.offset);
585         }
586
587         if (bitmap->storage.file) {
588                 loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host);
589                 int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize;
590
591                 err = read_page(bitmap->storage.file, 0,
592                                 bitmap, bytes, sb_page);
593         } else {
594                 err = read_sb_page(bitmap->mddev,
595                                    bitmap->mddev->bitmap_info.offset,
596                                    sb_page,
597                                    0, sizeof(bitmap_super_t));
598         }
599         if (err)
600                 return err;
601
602         err = -EINVAL;
603         sb = kmap_atomic(sb_page);
604
605         chunksize = le32_to_cpu(sb->chunksize);
606         daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
607         write_behind = le32_to_cpu(sb->write_behind);
608         sectors_reserved = le32_to_cpu(sb->sectors_reserved);
609         nodes = le32_to_cpu(sb->nodes);
610         strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
611
612         /* verify that the bitmap-specific fields are valid */
613         if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
614                 reason = "bad magic";
615         else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO ||
616                  le32_to_cpu(sb->version) > BITMAP_MAJOR_HI)
617                 reason = "unrecognized superblock version";
618         else if (chunksize < 512)
619                 reason = "bitmap chunksize too small";
620         else if (!is_power_of_2(chunksize))
621                 reason = "bitmap chunksize not a power of 2";
622         else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT)
623                 reason = "daemon sleep period out of range";
624         else if (write_behind > COUNTER_MAX)
625                 reason = "write-behind limit out of range (0 - 16383)";
626         if (reason) {
627                 printk(KERN_INFO "%s: invalid bitmap file superblock: %s\n",
628                         bmname(bitmap), reason);
629                 goto out;
630         }
631
632         /* keep the array size field of the bitmap superblock up to date */
633         sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
634
635         if (bitmap->mddev->persistent) {
636                 /*
637                  * We have a persistent array superblock, so compare the
638                  * bitmap's UUID and event counter to the mddev's
639                  */
640                 if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) {
641                         printk(KERN_INFO
642                                "%s: bitmap superblock UUID mismatch\n",
643                                bmname(bitmap));
644                         goto out;
645                 }
646                 events = le64_to_cpu(sb->events);
647                 if (!nodes && (events < bitmap->mddev->events)) {
648                         printk(KERN_INFO
649                                "%s: bitmap file is out of date (%llu < %llu) "
650                                "-- forcing full recovery\n",
651                                bmname(bitmap), events,
652                                (unsigned long long) bitmap->mddev->events);
653                         set_bit(BITMAP_STALE, &bitmap->flags);
654                 }
655         }
656
657         /* assign fields using values from superblock */
658         bitmap->flags |= le32_to_cpu(sb->state);
659         if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
660                 set_bit(BITMAP_HOSTENDIAN, &bitmap->flags);
661         bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
662         strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
663         err = 0;
664
665 out:
666         kunmap_atomic(sb);
667         /* Assiging chunksize is required for "re_read" */
668         bitmap->mddev->bitmap_info.chunksize = chunksize;
669         if (nodes && (bitmap->cluster_slot < 0)) {
670                 err = md_setup_cluster(bitmap->mddev, nodes);
671                 if (err) {
672                         pr_err("%s: Could not setup cluster service (%d)\n",
673                                         bmname(bitmap), err);
674                         goto out_no_sb;
675                 }
676                 bitmap->cluster_slot = md_cluster_ops->slot_number(bitmap->mddev);
677                 goto re_read;
678         }
679
680
681 out_no_sb:
682         if (test_bit(BITMAP_STALE, &bitmap->flags))
683                 bitmap->events_cleared = bitmap->mddev->events;
684         bitmap->mddev->bitmap_info.chunksize = chunksize;
685         bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
686         bitmap->mddev->bitmap_info.max_write_behind = write_behind;
687         bitmap->mddev->bitmap_info.nodes = nodes;
688         if (bitmap->mddev->bitmap_info.space == 0 ||
689             bitmap->mddev->bitmap_info.space > sectors_reserved)
690                 bitmap->mddev->bitmap_info.space = sectors_reserved;
691         if (err) {
692                 bitmap_print_sb(bitmap);
693                 if (bitmap->cluster_slot < 0)
694                         md_cluster_stop(bitmap->mddev);
695         }
696         return err;
697 }
698
699 /*
700  * general bitmap file operations
701  */
702
703 /*
704  * on-disk bitmap:
705  *
706  * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
707  * file a page at a time. There's a superblock at the start of the file.
708  */
709 /* calculate the index of the page that contains this bit */
710 static inline unsigned long file_page_index(struct bitmap_storage *store,
711                                             unsigned long chunk)
712 {
713         if (store->sb_page)
714                 chunk += sizeof(bitmap_super_t) << 3;
715         return chunk >> PAGE_BIT_SHIFT;
716 }
717
718 /* calculate the (bit) offset of this bit within a page */
719 static inline unsigned long file_page_offset(struct bitmap_storage *store,
720                                              unsigned long chunk)
721 {
722         if (store->sb_page)
723                 chunk += sizeof(bitmap_super_t) << 3;
724         return chunk & (PAGE_BITS - 1);
725 }
726
727 /*
728  * return a pointer to the page in the filemap that contains the given bit
729  *
730  */
731 static inline struct page *filemap_get_page(struct bitmap_storage *store,
732                                             unsigned long chunk)
733 {
734         if (file_page_index(store, chunk) >= store->file_pages)
735                 return NULL;
736         return store->filemap[file_page_index(store, chunk)];
737 }
738
739 static int bitmap_storage_alloc(struct bitmap_storage *store,
740                                 unsigned long chunks, int with_super,
741                                 int slot_number)
742 {
743         int pnum, offset = 0;
744         unsigned long num_pages;
745         unsigned long bytes;
746
747         bytes = DIV_ROUND_UP(chunks, 8);
748         if (with_super)
749                 bytes += sizeof(bitmap_super_t);
750
751         num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
752         offset = slot_number * (num_pages - 1);
753
754         store->filemap = kmalloc(sizeof(struct page *)
755                                  * num_pages, GFP_KERNEL);
756         if (!store->filemap)
757                 return -ENOMEM;
758
759         if (with_super && !store->sb_page) {
760                 store->sb_page = alloc_page(GFP_KERNEL|__GFP_ZERO);
761                 if (store->sb_page == NULL)
762                         return -ENOMEM;
763         }
764
765         pnum = 0;
766         if (store->sb_page) {
767                 store->filemap[0] = store->sb_page;
768                 pnum = 1;
769                 store->sb_page->index = offset;
770         }
771
772         for ( ; pnum < num_pages; pnum++) {
773                 store->filemap[pnum] = alloc_page(GFP_KERNEL|__GFP_ZERO);
774                 if (!store->filemap[pnum]) {
775                         store->file_pages = pnum;
776                         return -ENOMEM;
777                 }
778                 store->filemap[pnum]->index = pnum + offset;
779         }
780         store->file_pages = pnum;
781
782         /* We need 4 bits per page, rounded up to a multiple
783          * of sizeof(unsigned long) */
784         store->filemap_attr = kzalloc(
785                 roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)),
786                 GFP_KERNEL);
787         if (!store->filemap_attr)
788                 return -ENOMEM;
789
790         store->bytes = bytes;
791
792         return 0;
793 }
794
795 static void bitmap_file_unmap(struct bitmap_storage *store)
796 {
797         struct page **map, *sb_page;
798         int pages;
799         struct file *file;
800
801         file = store->file;
802         map = store->filemap;
803         pages = store->file_pages;
804         sb_page = store->sb_page;
805
806         while (pages--)
807                 if (map[pages] != sb_page) /* 0 is sb_page, release it below */
808                         free_buffers(map[pages]);
809         kfree(map);
810         kfree(store->filemap_attr);
811
812         if (sb_page)
813                 free_buffers(sb_page);
814
815         if (file) {
816                 struct inode *inode = file_inode(file);
817                 invalidate_mapping_pages(inode->i_mapping, 0, -1);
818                 fput(file);
819         }
820 }
821
822 /*
823  * bitmap_file_kick - if an error occurs while manipulating the bitmap file
824  * then it is no longer reliable, so we stop using it and we mark the file
825  * as failed in the superblock
826  */
827 static void bitmap_file_kick(struct bitmap *bitmap)
828 {
829         char *path, *ptr = NULL;
830
831         if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) {
832                 bitmap_update_sb(bitmap);
833
834                 if (bitmap->storage.file) {
835                         path = kmalloc(PAGE_SIZE, GFP_KERNEL);
836                         if (path)
837                                 ptr = d_path(&bitmap->storage.file->f_path,
838                                              path, PAGE_SIZE);
839
840                         printk(KERN_ALERT
841                               "%s: kicking failed bitmap file %s from array!\n",
842                               bmname(bitmap), IS_ERR(ptr) ? "" : ptr);
843
844                         kfree(path);
845                 } else
846                         printk(KERN_ALERT
847                                "%s: disabling internal bitmap due to errors\n",
848                                bmname(bitmap));
849         }
850 }
851
852 enum bitmap_page_attr {
853         BITMAP_PAGE_DIRTY = 0,     /* there are set bits that need to be synced */
854         BITMAP_PAGE_PENDING = 1,   /* there are bits that are being cleaned.
855                                     * i.e. counter is 1 or 2. */
856         BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */
857 };
858
859 static inline void set_page_attr(struct bitmap *bitmap, int pnum,
860                                  enum bitmap_page_attr attr)
861 {
862         set_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
863 }
864
865 static inline void clear_page_attr(struct bitmap *bitmap, int pnum,
866                                    enum bitmap_page_attr attr)
867 {
868         clear_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
869 }
870
871 static inline int test_page_attr(struct bitmap *bitmap, int pnum,
872                                  enum bitmap_page_attr attr)
873 {
874         return test_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
875 }
876
877 static inline int test_and_clear_page_attr(struct bitmap *bitmap, int pnum,
878                                            enum bitmap_page_attr attr)
879 {
880         return test_and_clear_bit((pnum<<2) + attr,
881                                   bitmap->storage.filemap_attr);
882 }
883 /*
884  * bitmap_file_set_bit -- called before performing a write to the md device
885  * to set (and eventually sync) a particular bit in the bitmap file
886  *
887  * we set the bit immediately, then we record the page number so that
888  * when an unplug occurs, we can flush the dirty pages out to disk
889  */
890 static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
891 {
892         unsigned long bit;
893         struct page *page;
894         void *kaddr;
895         unsigned long chunk = block >> bitmap->counts.chunkshift;
896
897         page = filemap_get_page(&bitmap->storage, chunk);
898         if (!page)
899                 return;
900         bit = file_page_offset(&bitmap->storage, chunk);
901
902         /* set the bit */
903         kaddr = kmap_atomic(page);
904         if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
905                 set_bit(bit, kaddr);
906         else
907                 set_bit_le(bit, kaddr);
908         kunmap_atomic(kaddr);
909         pr_debug("set file bit %lu page %lu\n", bit, page->index);
910         /* record page number so it gets flushed to disk when unplug occurs */
911         set_page_attr(bitmap, page->index, BITMAP_PAGE_DIRTY);
912 }
913
914 static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
915 {
916         unsigned long bit;
917         struct page *page;
918         void *paddr;
919         unsigned long chunk = block >> bitmap->counts.chunkshift;
920
921         page = filemap_get_page(&bitmap->storage, chunk);
922         if (!page)
923                 return;
924         bit = file_page_offset(&bitmap->storage, chunk);
925         paddr = kmap_atomic(page);
926         if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
927                 clear_bit(bit, paddr);
928         else
929                 clear_bit_le(bit, paddr);
930         kunmap_atomic(paddr);
931         if (!test_page_attr(bitmap, page->index, BITMAP_PAGE_NEEDWRITE)) {
932                 set_page_attr(bitmap, page->index, BITMAP_PAGE_PENDING);
933                 bitmap->allclean = 0;
934         }
935 }
936
937 static int bitmap_file_test_bit(struct bitmap *bitmap, sector_t block)
938 {
939         unsigned long bit;
940         struct page *page;
941         void *paddr;
942         unsigned long chunk = block >> bitmap->counts.chunkshift;
943         int set = 0;
944
945         page = filemap_get_page(&bitmap->storage, chunk);
946         if (!page)
947                 return -EINVAL;
948         bit = file_page_offset(&bitmap->storage, chunk);
949         paddr = kmap_atomic(page);
950         if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
951                 set = test_bit(bit, paddr);
952         else
953                 set = test_bit_le(bit, paddr);
954         kunmap_atomic(paddr);
955         return set;
956 }
957
958
959 /* this gets called when the md device is ready to unplug its underlying
960  * (slave) device queues -- before we let any writes go down, we need to
961  * sync the dirty pages of the bitmap file to disk */
962 void bitmap_unplug(struct bitmap *bitmap)
963 {
964         unsigned long i;
965         int dirty, need_write;
966
967         if (!bitmap || !bitmap->storage.filemap ||
968             test_bit(BITMAP_STALE, &bitmap->flags))
969                 return;
970
971         /* look at each page to see if there are any set bits that need to be
972          * flushed out to disk */
973         for (i = 0; i < bitmap->storage.file_pages; i++) {
974                 if (!bitmap->storage.filemap)
975                         return;
976                 dirty = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
977                 need_write = test_and_clear_page_attr(bitmap, i,
978                                                       BITMAP_PAGE_NEEDWRITE);
979                 if (dirty || need_write) {
980                         clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING);
981                         write_page(bitmap, bitmap->storage.filemap[i], 0);
982                 }
983         }
984         if (bitmap->storage.file)
985                 wait_event(bitmap->write_wait,
986                            atomic_read(&bitmap->pending_writes)==0);
987         else
988                 md_super_wait(bitmap->mddev);
989
990         if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
991                 bitmap_file_kick(bitmap);
992 }
993 EXPORT_SYMBOL(bitmap_unplug);
994
995 static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
996 /* * bitmap_init_from_disk -- called at bitmap_create time to initialize
997  * the in-memory bitmap from the on-disk bitmap -- also, sets up the
998  * memory mapping of the bitmap file
999  * Special cases:
1000  *   if there's no bitmap file, or if the bitmap file had been
1001  *   previously kicked from the array, we mark all the bits as
1002  *   1's in order to cause a full resync.
1003  *
1004  * We ignore all bits for sectors that end earlier than 'start'.
1005  * This is used when reading an out-of-date bitmap...
1006  */
1007 static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
1008 {
1009         unsigned long i, chunks, index, oldindex, bit, node_offset = 0;
1010         struct page *page = NULL;
1011         unsigned long bit_cnt = 0;
1012         struct file *file;
1013         unsigned long offset;
1014         int outofdate;
1015         int ret = -ENOSPC;
1016         void *paddr;
1017         struct bitmap_storage *store = &bitmap->storage;
1018
1019         chunks = bitmap->counts.chunks;
1020         file = store->file;
1021
1022         if (!file && !bitmap->mddev->bitmap_info.offset) {
1023                 /* No permanent bitmap - fill with '1s'. */
1024                 store->filemap = NULL;
1025                 store->file_pages = 0;
1026                 for (i = 0; i < chunks ; i++) {
1027                         /* if the disk bit is set, set the memory bit */
1028                         int needed = ((sector_t)(i+1) << (bitmap->counts.chunkshift)
1029                                       >= start);
1030                         bitmap_set_memory_bits(bitmap,
1031                                                (sector_t)i << bitmap->counts.chunkshift,
1032                                                needed);
1033                 }
1034                 return 0;
1035         }
1036
1037         outofdate = test_bit(BITMAP_STALE, &bitmap->flags);
1038         if (outofdate)
1039                 printk(KERN_INFO "%s: bitmap file is out of date, doing full "
1040                         "recovery\n", bmname(bitmap));
1041
1042         if (file && i_size_read(file->f_mapping->host) < store->bytes) {
1043                 printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
1044                        bmname(bitmap),
1045                        (unsigned long) i_size_read(file->f_mapping->host),
1046                        store->bytes);
1047                 goto err;
1048         }
1049
1050         oldindex = ~0L;
1051         offset = 0;
1052         if (!bitmap->mddev->bitmap_info.external)
1053                 offset = sizeof(bitmap_super_t);
1054
1055         if (mddev_is_clustered(bitmap->mddev))
1056                 node_offset = bitmap->cluster_slot * (DIV_ROUND_UP(store->bytes, PAGE_SIZE));
1057
1058         for (i = 0; i < chunks; i++) {
1059                 int b;
1060                 index = file_page_index(&bitmap->storage, i);
1061                 bit = file_page_offset(&bitmap->storage, i);
1062                 if (index != oldindex) { /* this is a new page, read it in */
1063                         int count;
1064                         /* unmap the old page, we're done with it */
1065                         if (index == store->file_pages-1)
1066                                 count = store->bytes - index * PAGE_SIZE;
1067                         else
1068                                 count = PAGE_SIZE;
1069                         page = store->filemap[index];
1070                         if (file)
1071                                 ret = read_page(file, index, bitmap,
1072                                                 count, page);
1073                         else
1074                                 ret = read_sb_page(
1075                                         bitmap->mddev,
1076                                         bitmap->mddev->bitmap_info.offset,
1077                                         page,
1078                                         index + node_offset, count);
1079
1080                         if (ret)
1081                                 goto err;
1082
1083                         oldindex = index;
1084
1085                         if (outofdate) {
1086                                 /*
1087                                  * if bitmap is out of date, dirty the
1088                                  * whole page and write it out
1089                                  */
1090                                 paddr = kmap_atomic(page);
1091                                 memset(paddr + offset, 0xff,
1092                                        PAGE_SIZE - offset);
1093                                 kunmap_atomic(paddr);
1094                                 write_page(bitmap, page, 1);
1095
1096                                 ret = -EIO;
1097                                 if (test_bit(BITMAP_WRITE_ERROR,
1098                                              &bitmap->flags))
1099                                         goto err;
1100                         }
1101                 }
1102                 paddr = kmap_atomic(page);
1103                 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
1104                         b = test_bit(bit, paddr);
1105                 else
1106                         b = test_bit_le(bit, paddr);
1107                 kunmap_atomic(paddr);
1108                 if (b) {
1109                         /* if the disk bit is set, set the memory bit */
1110                         int needed = ((sector_t)(i+1) << bitmap->counts.chunkshift
1111                                       >= start);
1112                         bitmap_set_memory_bits(bitmap,
1113                                                (sector_t)i << bitmap->counts.chunkshift,
1114                                                needed);
1115                         bit_cnt++;
1116                 }
1117                 offset = 0;
1118         }
1119
1120         printk(KERN_INFO "%s: bitmap initialized from disk: "
1121                "read %lu pages, set %lu of %lu bits\n",
1122                bmname(bitmap), store->file_pages,
1123                bit_cnt, chunks);
1124
1125         return 0;
1126
1127  err:
1128         printk(KERN_INFO "%s: bitmap initialisation failed: %d\n",
1129                bmname(bitmap), ret);
1130         return ret;
1131 }
1132
1133 void bitmap_write_all(struct bitmap *bitmap)
1134 {
1135         /* We don't actually write all bitmap blocks here,
1136          * just flag them as needing to be written
1137          */
1138         int i;
1139
1140         if (!bitmap || !bitmap->storage.filemap)
1141                 return;
1142         if (bitmap->storage.file)
1143                 /* Only one copy, so nothing needed */
1144                 return;
1145
1146         for (i = 0; i < bitmap->storage.file_pages; i++)
1147                 set_page_attr(bitmap, i,
1148                               BITMAP_PAGE_NEEDWRITE);
1149         bitmap->allclean = 0;
1150 }
1151
1152 static void bitmap_count_page(struct bitmap_counts *bitmap,
1153                               sector_t offset, int inc)
1154 {
1155         sector_t chunk = offset >> bitmap->chunkshift;
1156         unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1157         bitmap->bp[page].count += inc;
1158         bitmap_checkfree(bitmap, page);
1159 }
1160
1161 static void bitmap_set_pending(struct bitmap_counts *bitmap, sector_t offset)
1162 {
1163         sector_t chunk = offset >> bitmap->chunkshift;
1164         unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1165         struct bitmap_page *bp = &bitmap->bp[page];
1166
1167         if (!bp->pending)
1168                 bp->pending = 1;
1169 }
1170
1171 static bitmap_counter_t *bitmap_get_counter(struct bitmap_counts *bitmap,
1172                                             sector_t offset, sector_t *blocks,
1173                                             int create);
1174
1175 /*
1176  * bitmap daemon -- periodically wakes up to clean bits and flush pages
1177  *                      out to disk
1178  */
1179
1180 void bitmap_daemon_work(struct mddev *mddev)
1181 {
1182         struct bitmap *bitmap;
1183         unsigned long j;
1184         unsigned long nextpage;
1185         sector_t blocks;
1186         struct bitmap_counts *counts;
1187
1188         /* Use a mutex to guard daemon_work against
1189          * bitmap_destroy.
1190          */
1191         mutex_lock(&mddev->bitmap_info.mutex);
1192         bitmap = mddev->bitmap;
1193         if (bitmap == NULL) {
1194                 mutex_unlock(&mddev->bitmap_info.mutex);
1195                 return;
1196         }
1197         if (time_before(jiffies, bitmap->daemon_lastrun
1198                         + mddev->bitmap_info.daemon_sleep))
1199                 goto done;
1200
1201         bitmap->daemon_lastrun = jiffies;
1202         if (bitmap->allclean) {
1203                 mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
1204                 goto done;
1205         }
1206         bitmap->allclean = 1;
1207
1208         /* Any file-page which is PENDING now needs to be written.
1209          * So set NEEDWRITE now, then after we make any last-minute changes
1210          * we will write it.
1211          */
1212         for (j = 0; j < bitmap->storage.file_pages; j++)
1213                 if (test_and_clear_page_attr(bitmap, j,
1214                                              BITMAP_PAGE_PENDING))
1215                         set_page_attr(bitmap, j,
1216                                       BITMAP_PAGE_NEEDWRITE);
1217
1218         if (bitmap->need_sync &&
1219             mddev->bitmap_info.external == 0) {
1220                 /* Arrange for superblock update as well as
1221                  * other changes */
1222                 bitmap_super_t *sb;
1223                 bitmap->need_sync = 0;
1224                 if (bitmap->storage.filemap) {
1225                         sb = kmap_atomic(bitmap->storage.sb_page);
1226                         sb->events_cleared =
1227                                 cpu_to_le64(bitmap->events_cleared);
1228                         kunmap_atomic(sb);
1229                         set_page_attr(bitmap, 0,
1230                                       BITMAP_PAGE_NEEDWRITE);
1231                 }
1232         }
1233         /* Now look at the bitmap counters and if any are '2' or '1',
1234          * decrement and handle accordingly.
1235          */
1236         counts = &bitmap->counts;
1237         spin_lock_irq(&counts->lock);
1238         nextpage = 0;
1239         for (j = 0; j < counts->chunks; j++) {
1240                 bitmap_counter_t *bmc;
1241                 sector_t  block = (sector_t)j << counts->chunkshift;
1242
1243                 if (j == nextpage) {
1244                         nextpage += PAGE_COUNTER_RATIO;
1245                         if (!counts->bp[j >> PAGE_COUNTER_SHIFT].pending) {
1246                                 j |= PAGE_COUNTER_MASK;
1247                                 continue;
1248                         }
1249                         counts->bp[j >> PAGE_COUNTER_SHIFT].pending = 0;
1250                 }
1251                 bmc = bitmap_get_counter(counts,
1252                                          block,
1253                                          &blocks, 0);
1254
1255                 if (!bmc) {
1256                         j |= PAGE_COUNTER_MASK;
1257                         continue;
1258                 }
1259                 if (*bmc == 1 && !bitmap->need_sync) {
1260                         /* We can clear the bit */
1261                         *bmc = 0;
1262                         bitmap_count_page(counts, block, -1);
1263                         bitmap_file_clear_bit(bitmap, block);
1264                 } else if (*bmc && *bmc <= 2) {
1265                         *bmc = 1;
1266                         bitmap_set_pending(counts, block);
1267                         bitmap->allclean = 0;
1268                 }
1269         }
1270         spin_unlock_irq(&counts->lock);
1271
1272         /* Now start writeout on any page in NEEDWRITE that isn't DIRTY.
1273          * DIRTY pages need to be written by bitmap_unplug so it can wait
1274          * for them.
1275          * If we find any DIRTY page we stop there and let bitmap_unplug
1276          * handle all the rest.  This is important in the case where
1277          * the first blocking holds the superblock and it has been updated.
1278          * We mustn't write any other blocks before the superblock.
1279          */
1280         for (j = 0;
1281              j < bitmap->storage.file_pages
1282                      && !test_bit(BITMAP_STALE, &bitmap->flags);
1283              j++) {
1284                 if (test_page_attr(bitmap, j,
1285                                    BITMAP_PAGE_DIRTY))
1286                         /* bitmap_unplug will handle the rest */
1287                         break;
1288                 if (test_and_clear_page_attr(bitmap, j,
1289                                              BITMAP_PAGE_NEEDWRITE)) {
1290                         write_page(bitmap, bitmap->storage.filemap[j], 0);
1291                 }
1292         }
1293
1294  done:
1295         if (bitmap->allclean == 0)
1296                 mddev->thread->timeout =
1297                         mddev->bitmap_info.daemon_sleep;
1298         mutex_unlock(&mddev->bitmap_info.mutex);
1299 }
1300
1301 static bitmap_counter_t *bitmap_get_counter(struct bitmap_counts *bitmap,
1302                                             sector_t offset, sector_t *blocks,
1303                                             int create)
1304 __releases(bitmap->lock)
1305 __acquires(bitmap->lock)
1306 {
1307         /* If 'create', we might release the lock and reclaim it.
1308          * The lock must have been taken with interrupts enabled.
1309          * If !create, we don't release the lock.
1310          */
1311         sector_t chunk = offset >> bitmap->chunkshift;
1312         unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1313         unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT;
1314         sector_t csize;
1315         int err;
1316
1317         err = bitmap_checkpage(bitmap, page, create);
1318
1319         if (bitmap->bp[page].hijacked ||
1320             bitmap->bp[page].map == NULL)
1321                 csize = ((sector_t)1) << (bitmap->chunkshift +
1322                                           PAGE_COUNTER_SHIFT - 1);
1323         else
1324                 csize = ((sector_t)1) << bitmap->chunkshift;
1325         *blocks = csize - (offset & (csize - 1));
1326
1327         if (err < 0)
1328                 return NULL;
1329
1330         /* now locked ... */
1331
1332         if (bitmap->bp[page].hijacked) { /* hijacked pointer */
1333                 /* should we use the first or second counter field
1334                  * of the hijacked pointer? */
1335                 int hi = (pageoff > PAGE_COUNTER_MASK);
1336                 return  &((bitmap_counter_t *)
1337                           &bitmap->bp[page].map)[hi];
1338         } else /* page is allocated */
1339                 return (bitmap_counter_t *)
1340                         &(bitmap->bp[page].map[pageoff]);
1341 }
1342
1343 int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind)
1344 {
1345         if (!bitmap)
1346                 return 0;
1347
1348         if (behind) {
1349                 int bw;
1350                 atomic_inc(&bitmap->behind_writes);
1351                 bw = atomic_read(&bitmap->behind_writes);
1352                 if (bw > bitmap->behind_writes_used)
1353                         bitmap->behind_writes_used = bw;
1354
1355                 pr_debug("inc write-behind count %d/%lu\n",
1356                          bw, bitmap->mddev->bitmap_info.max_write_behind);
1357         }
1358
1359         while (sectors) {
1360                 sector_t blocks;
1361                 bitmap_counter_t *bmc;
1362
1363                 spin_lock_irq(&bitmap->counts.lock);
1364                 bmc = bitmap_get_counter(&bitmap->counts, offset, &blocks, 1);
1365                 if (!bmc) {
1366                         spin_unlock_irq(&bitmap->counts.lock);
1367                         return 0;
1368                 }
1369
1370                 if (unlikely(COUNTER(*bmc) == COUNTER_MAX)) {
1371                         DEFINE_WAIT(__wait);
1372                         /* note that it is safe to do the prepare_to_wait
1373                          * after the test as long as we do it before dropping
1374                          * the spinlock.
1375                          */
1376                         prepare_to_wait(&bitmap->overflow_wait, &__wait,
1377                                         TASK_UNINTERRUPTIBLE);
1378                         spin_unlock_irq(&bitmap->counts.lock);
1379                         schedule();
1380                         finish_wait(&bitmap->overflow_wait, &__wait);
1381                         continue;
1382                 }
1383
1384                 switch (*bmc) {
1385                 case 0:
1386                         bitmap_file_set_bit(bitmap, offset);
1387                         bitmap_count_page(&bitmap->counts, offset, 1);
1388                         /* fall through */
1389                 case 1:
1390                         *bmc = 2;
1391                 }
1392
1393                 (*bmc)++;
1394
1395                 spin_unlock_irq(&bitmap->counts.lock);
1396
1397                 offset += blocks;
1398                 if (sectors > blocks)
1399                         sectors -= blocks;
1400                 else
1401                         sectors = 0;
1402         }
1403         return 0;
1404 }
1405 EXPORT_SYMBOL(bitmap_startwrite);
1406
1407 void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors,
1408                      int success, int behind)
1409 {
1410         if (!bitmap)
1411                 return;
1412         if (behind) {
1413                 if (atomic_dec_and_test(&bitmap->behind_writes))
1414                         wake_up(&bitmap->behind_wait);
1415                 pr_debug("dec write-behind count %d/%lu\n",
1416                          atomic_read(&bitmap->behind_writes),
1417                          bitmap->mddev->bitmap_info.max_write_behind);
1418         }
1419
1420         while (sectors) {
1421                 sector_t blocks;
1422                 unsigned long flags;
1423                 bitmap_counter_t *bmc;
1424
1425                 spin_lock_irqsave(&bitmap->counts.lock, flags);
1426                 bmc = bitmap_get_counter(&bitmap->counts, offset, &blocks, 0);
1427                 if (!bmc) {
1428                         spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1429                         return;
1430                 }
1431
1432                 if (success && !bitmap->mddev->degraded &&
1433                     bitmap->events_cleared < bitmap->mddev->events) {
1434                         bitmap->events_cleared = bitmap->mddev->events;
1435                         bitmap->need_sync = 1;
1436                         sysfs_notify_dirent_safe(bitmap->sysfs_can_clear);
1437                 }
1438
1439                 if (!success && !NEEDED(*bmc))
1440                         *bmc |= NEEDED_MASK;
1441
1442                 if (COUNTER(*bmc) == COUNTER_MAX)
1443                         wake_up(&bitmap->overflow_wait);
1444
1445                 (*bmc)--;
1446                 if (*bmc <= 2) {
1447                         bitmap_set_pending(&bitmap->counts, offset);
1448                         bitmap->allclean = 0;
1449                 }
1450                 spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1451                 offset += blocks;
1452                 if (sectors > blocks)
1453                         sectors -= blocks;
1454                 else
1455                         sectors = 0;
1456         }
1457 }
1458 EXPORT_SYMBOL(bitmap_endwrite);
1459
1460 static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
1461                                int degraded)
1462 {
1463         bitmap_counter_t *bmc;
1464         int rv;
1465         if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */
1466                 *blocks = 1024;
1467                 return 1; /* always resync if no bitmap */
1468         }
1469         spin_lock_irq(&bitmap->counts.lock);
1470         bmc = bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
1471         rv = 0;
1472         if (bmc) {
1473                 /* locked */
1474                 if (RESYNC(*bmc))
1475                         rv = 1;
1476                 else if (NEEDED(*bmc)) {
1477                         rv = 1;
1478                         if (!degraded) { /* don't set/clear bits if degraded */
1479                                 *bmc |= RESYNC_MASK;
1480                                 *bmc &= ~NEEDED_MASK;
1481                         }
1482                 }
1483         }
1484         spin_unlock_irq(&bitmap->counts.lock);
1485         return rv;
1486 }
1487
1488 int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
1489                       int degraded)
1490 {
1491         /* bitmap_start_sync must always report on multiples of whole
1492          * pages, otherwise resync (which is very PAGE_SIZE based) will
1493          * get confused.
1494          * So call __bitmap_start_sync repeatedly (if needed) until
1495          * At least PAGE_SIZE>>9 blocks are covered.
1496          * Return the 'or' of the result.
1497          */
1498         int rv = 0;
1499         sector_t blocks1;
1500
1501         *blocks = 0;
1502         while (*blocks < (PAGE_SIZE>>9)) {
1503                 rv |= __bitmap_start_sync(bitmap, offset,
1504                                           &blocks1, degraded);
1505                 offset += blocks1;
1506                 *blocks += blocks1;
1507         }
1508         return rv;
1509 }
1510 EXPORT_SYMBOL(bitmap_start_sync);
1511
1512 void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted)
1513 {
1514         bitmap_counter_t *bmc;
1515         unsigned long flags;
1516
1517         if (bitmap == NULL) {
1518                 *blocks = 1024;
1519                 return;
1520         }
1521         spin_lock_irqsave(&bitmap->counts.lock, flags);
1522         bmc = bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
1523         if (bmc == NULL)
1524                 goto unlock;
1525         /* locked */
1526         if (RESYNC(*bmc)) {
1527                 *bmc &= ~RESYNC_MASK;
1528
1529                 if (!NEEDED(*bmc) && aborted)
1530                         *bmc |= NEEDED_MASK;
1531                 else {
1532                         if (*bmc <= 2) {
1533                                 bitmap_set_pending(&bitmap->counts, offset);
1534                                 bitmap->allclean = 0;
1535                         }
1536                 }
1537         }
1538  unlock:
1539         spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1540 }
1541 EXPORT_SYMBOL(bitmap_end_sync);
1542
1543 void bitmap_close_sync(struct bitmap *bitmap)
1544 {
1545         /* Sync has finished, and any bitmap chunks that weren't synced
1546          * properly have been aborted.  It remains to us to clear the
1547          * RESYNC bit wherever it is still on
1548          */
1549         sector_t sector = 0;
1550         sector_t blocks;
1551         if (!bitmap)
1552                 return;
1553         while (sector < bitmap->mddev->resync_max_sectors) {
1554                 bitmap_end_sync(bitmap, sector, &blocks, 0);
1555                 sector += blocks;
1556         }
1557 }
1558 EXPORT_SYMBOL(bitmap_close_sync);
1559
1560 void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
1561 {
1562         sector_t s = 0;
1563         sector_t blocks;
1564
1565         if (!bitmap)
1566                 return;
1567         if (sector == 0) {
1568                 bitmap->last_end_sync = jiffies;
1569                 return;
1570         }
1571         if (time_before(jiffies, (bitmap->last_end_sync
1572                                   + bitmap->mddev->bitmap_info.daemon_sleep)))
1573                 return;
1574         wait_event(bitmap->mddev->recovery_wait,
1575                    atomic_read(&bitmap->mddev->recovery_active) == 0);
1576
1577         bitmap->mddev->curr_resync_completed = sector;
1578         set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags);
1579         sector &= ~((1ULL << bitmap->counts.chunkshift) - 1);
1580         s = 0;
1581         while (s < sector && s < bitmap->mddev->resync_max_sectors) {
1582                 bitmap_end_sync(bitmap, s, &blocks, 0);
1583                 s += blocks;
1584         }
1585         bitmap->last_end_sync = jiffies;
1586         sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed");
1587 }
1588 EXPORT_SYMBOL(bitmap_cond_end_sync);
1589
1590 static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed)
1591 {
1592         /* For each chunk covered by any of these sectors, set the
1593          * counter to 2 and possibly set resync_needed.  They should all
1594          * be 0 at this point
1595          */
1596
1597         sector_t secs;
1598         bitmap_counter_t *bmc;
1599         spin_lock_irq(&bitmap->counts.lock);
1600         bmc = bitmap_get_counter(&bitmap->counts, offset, &secs, 1);
1601         if (!bmc) {
1602                 spin_unlock_irq(&bitmap->counts.lock);
1603                 return;
1604         }
1605         if (!*bmc) {
1606                 *bmc = 2;
1607                 bitmap_count_page(&bitmap->counts, offset, 1);
1608                 bitmap_set_pending(&bitmap->counts, offset);
1609                 bitmap->allclean = 0;
1610         }
1611         if (needed)
1612                 *bmc |= NEEDED_MASK;
1613         spin_unlock_irq(&bitmap->counts.lock);
1614 }
1615
1616 /* dirty the memory and file bits for bitmap chunks "s" to "e" */
1617 void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
1618 {
1619         unsigned long chunk;
1620
1621         for (chunk = s; chunk <= e; chunk++) {
1622                 sector_t sec = (sector_t)chunk << bitmap->counts.chunkshift;
1623                 bitmap_set_memory_bits(bitmap, sec, 1);
1624                 bitmap_file_set_bit(bitmap, sec);
1625                 if (sec < bitmap->mddev->recovery_cp)
1626                         /* We are asserting that the array is dirty,
1627                          * so move the recovery_cp address back so
1628                          * that it is obvious that it is dirty
1629                          */
1630                         bitmap->mddev->recovery_cp = sec;
1631         }
1632 }
1633
1634 /*
1635  * flush out any pending updates
1636  */
1637 void bitmap_flush(struct mddev *mddev)
1638 {
1639         struct bitmap *bitmap = mddev->bitmap;
1640         long sleep;
1641
1642         if (!bitmap) /* there was no bitmap */
1643                 return;
1644
1645         /* run the daemon_work three time to ensure everything is flushed
1646          * that can be
1647          */
1648         sleep = mddev->bitmap_info.daemon_sleep * 2;
1649         bitmap->daemon_lastrun -= sleep;
1650         bitmap_daemon_work(mddev);
1651         bitmap->daemon_lastrun -= sleep;
1652         bitmap_daemon_work(mddev);
1653         bitmap->daemon_lastrun -= sleep;
1654         bitmap_daemon_work(mddev);
1655         bitmap_update_sb(bitmap);
1656 }
1657
1658 /*
1659  * free memory that was allocated
1660  */
1661 static void bitmap_free(struct bitmap *bitmap)
1662 {
1663         unsigned long k, pages;
1664         struct bitmap_page *bp;
1665
1666         if (!bitmap) /* there was no bitmap */
1667                 return;
1668
1669         if (mddev_is_clustered(bitmap->mddev) && bitmap->mddev->cluster_info &&
1670                 bitmap->cluster_slot == md_cluster_ops->slot_number(bitmap->mddev))
1671                 md_cluster_stop(bitmap->mddev);
1672
1673         /* Shouldn't be needed - but just in case.... */
1674         wait_event(bitmap->write_wait,
1675                    atomic_read(&bitmap->pending_writes) == 0);
1676
1677         /* release the bitmap file  */
1678         bitmap_file_unmap(&bitmap->storage);
1679
1680         bp = bitmap->counts.bp;
1681         pages = bitmap->counts.pages;
1682
1683         /* free all allocated memory */
1684
1685         if (bp) /* deallocate the page memory */
1686                 for (k = 0; k < pages; k++)
1687                         if (bp[k].map && !bp[k].hijacked)
1688                                 kfree(bp[k].map);
1689         kfree(bp);
1690         kfree(bitmap);
1691 }
1692
1693 void bitmap_destroy(struct mddev *mddev)
1694 {
1695         struct bitmap *bitmap = mddev->bitmap;
1696
1697         if (!bitmap) /* there was no bitmap */
1698                 return;
1699
1700         mutex_lock(&mddev->bitmap_info.mutex);
1701         spin_lock(&mddev->lock);
1702         mddev->bitmap = NULL; /* disconnect from the md device */
1703         spin_unlock(&mddev->lock);
1704         mutex_unlock(&mddev->bitmap_info.mutex);
1705         if (mddev->thread)
1706                 mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
1707
1708         if (bitmap->sysfs_can_clear)
1709                 sysfs_put(bitmap->sysfs_can_clear);
1710
1711         bitmap_free(bitmap);
1712 }
1713
1714 /*
1715  * initialize the bitmap structure
1716  * if this returns an error, bitmap_destroy must be called to do clean up
1717  */
1718 struct bitmap *bitmap_create(struct mddev *mddev, int slot)
1719 {
1720         struct bitmap *bitmap;
1721         sector_t blocks = mddev->resync_max_sectors;
1722         struct file *file = mddev->bitmap_info.file;
1723         int err;
1724         struct kernfs_node *bm = NULL;
1725
1726         BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
1727
1728         BUG_ON(file && mddev->bitmap_info.offset);
1729
1730         bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
1731         if (!bitmap)
1732                 return ERR_PTR(-ENOMEM);
1733
1734         spin_lock_init(&bitmap->counts.lock);
1735         atomic_set(&bitmap->pending_writes, 0);
1736         init_waitqueue_head(&bitmap->write_wait);
1737         init_waitqueue_head(&bitmap->overflow_wait);
1738         init_waitqueue_head(&bitmap->behind_wait);
1739
1740         bitmap->mddev = mddev;
1741         bitmap->cluster_slot = slot;
1742
1743         if (mddev->kobj.sd)
1744                 bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap");
1745         if (bm) {
1746                 bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear");
1747                 sysfs_put(bm);
1748         } else
1749                 bitmap->sysfs_can_clear = NULL;
1750
1751         bitmap->storage.file = file;
1752         if (file) {
1753                 get_file(file);
1754                 /* As future accesses to this file will use bmap,
1755                  * and bypass the page cache, we must sync the file
1756                  * first.
1757                  */
1758                 vfs_fsync(file, 1);
1759         }
1760         /* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
1761         if (!mddev->bitmap_info.external) {
1762                 /*
1763                  * If 'MD_ARRAY_FIRST_USE' is set, then device-mapper is
1764                  * instructing us to create a new on-disk bitmap instance.
1765                  */
1766                 if (test_and_clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags))
1767                         err = bitmap_new_disk_sb(bitmap);
1768                 else
1769                         err = bitmap_read_sb(bitmap);
1770         } else {
1771                 err = 0;
1772                 if (mddev->bitmap_info.chunksize == 0 ||
1773                     mddev->bitmap_info.daemon_sleep == 0)
1774                         /* chunksize and time_base need to be
1775                          * set first. */
1776                         err = -EINVAL;
1777         }
1778         if (err)
1779                 goto error;
1780
1781         bitmap->daemon_lastrun = jiffies;
1782         err = bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize, 1);
1783         if (err)
1784                 goto error;
1785
1786         printk(KERN_INFO "created bitmap (%lu pages) for device %s\n",
1787                bitmap->counts.pages, bmname(bitmap));
1788
1789         err = test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0;
1790         if (err)
1791                 goto error;
1792
1793         return bitmap;
1794  error:
1795         bitmap_free(bitmap);
1796         return ERR_PTR(err);
1797 }
1798
1799 int bitmap_load(struct mddev *mddev)
1800 {
1801         int err = 0;
1802         sector_t start = 0;
1803         sector_t sector = 0;
1804         struct bitmap *bitmap = mddev->bitmap;
1805
1806         if (!bitmap)
1807                 goto out;
1808
1809         /* Clear out old bitmap info first:  Either there is none, or we
1810          * are resuming after someone else has possibly changed things,
1811          * so we should forget old cached info.
1812          * All chunks should be clean, but some might need_sync.
1813          */
1814         while (sector < mddev->resync_max_sectors) {
1815                 sector_t blocks;
1816                 bitmap_start_sync(bitmap, sector, &blocks, 0);
1817                 sector += blocks;
1818         }
1819         bitmap_close_sync(bitmap);
1820
1821         if (mddev->degraded == 0
1822             || bitmap->events_cleared == mddev->events)
1823                 /* no need to keep dirty bits to optimise a
1824                  * re-add of a missing device */
1825                 start = mddev->recovery_cp;
1826
1827         mutex_lock(&mddev->bitmap_info.mutex);
1828         err = bitmap_init_from_disk(bitmap, start);
1829         mutex_unlock(&mddev->bitmap_info.mutex);
1830
1831         if (err)
1832                 goto out;
1833         clear_bit(BITMAP_STALE, &bitmap->flags);
1834
1835         /* Kick recovery in case any bits were set */
1836         set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);
1837
1838         mddev->thread->timeout = mddev->bitmap_info.daemon_sleep;
1839         md_wakeup_thread(mddev->thread);
1840
1841         bitmap_update_sb(bitmap);
1842
1843         if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
1844                 err = -EIO;
1845 out:
1846         return err;
1847 }
1848 EXPORT_SYMBOL_GPL(bitmap_load);
1849
1850 /* Loads the bitmap associated with slot and copies the resync information
1851  * to our bitmap
1852  */
1853 int bitmap_copy_from_slot(struct mddev *mddev, int slot,
1854                 sector_t *low, sector_t *high, bool clear_bits)
1855 {
1856         int rv = 0, i, j;
1857         sector_t block, lo = 0, hi = 0;
1858         struct bitmap_counts *counts;
1859         struct bitmap *bitmap = bitmap_create(mddev, slot);
1860
1861         if (IS_ERR(bitmap))
1862                 return PTR_ERR(bitmap);
1863
1864         rv = bitmap_read_sb(bitmap);
1865         if (rv)
1866                 goto err;
1867
1868         rv = bitmap_init_from_disk(bitmap, 0);
1869         if (rv)
1870                 goto err;
1871
1872         counts = &bitmap->counts;
1873         for (j = 0; j < counts->chunks; j++) {
1874                 block = (sector_t)j << counts->chunkshift;
1875                 if (bitmap_file_test_bit(bitmap, block)) {
1876                         if (!lo)
1877                                 lo = block;
1878                         hi = block;
1879                         bitmap_file_clear_bit(bitmap, block);
1880                         bitmap_set_memory_bits(mddev->bitmap, block, 1);
1881                         bitmap_file_set_bit(mddev->bitmap, block);
1882                 }
1883         }
1884
1885         if (clear_bits) {
1886                 bitmap_update_sb(bitmap);
1887                 /* Setting this for the ev_page should be enough.
1888                  * And we do not require both write_all and PAGE_DIRT either
1889                  */
1890                 for (i = 0; i < bitmap->storage.file_pages; i++)
1891                         set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
1892                 bitmap_write_all(bitmap);
1893                 bitmap_unplug(bitmap);
1894         }
1895         *low = lo;
1896         *high = hi;
1897 err:
1898         bitmap_free(bitmap);
1899         return rv;
1900 }
1901 EXPORT_SYMBOL_GPL(bitmap_copy_from_slot);
1902
1903
1904 void bitmap_status(struct seq_file *seq, struct bitmap *bitmap)
1905 {
1906         unsigned long chunk_kb;
1907         struct bitmap_counts *counts;
1908
1909         if (!bitmap)
1910                 return;
1911
1912         counts = &bitmap->counts;
1913
1914         chunk_kb = bitmap->mddev->bitmap_info.chunksize >> 10;
1915         seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
1916                    "%lu%s chunk",
1917                    counts->pages - counts->missing_pages,
1918                    counts->pages,
1919                    (counts->pages - counts->missing_pages)
1920                    << (PAGE_SHIFT - 10),
1921                    chunk_kb ? chunk_kb : bitmap->mddev->bitmap_info.chunksize,
1922                    chunk_kb ? "KB" : "B");
1923         if (bitmap->storage.file) {
1924                 seq_printf(seq, ", file: ");
1925                 seq_path(seq, &bitmap->storage.file->f_path, " \t\n");
1926         }
1927
1928         seq_printf(seq, "\n");
1929 }
1930
1931 int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
1932                   int chunksize, int init)
1933 {
1934         /* If chunk_size is 0, choose an appropriate chunk size.
1935          * Then possibly allocate new storage space.
1936          * Then quiesce, copy bits, replace bitmap, and re-start
1937          *
1938          * This function is called both to set up the initial bitmap
1939          * and to resize the bitmap while the array is active.
1940          * If this happens as a result of the array being resized,
1941          * chunksize will be zero, and we need to choose a suitable
1942          * chunksize, otherwise we use what we are given.
1943          */
1944         struct bitmap_storage store;
1945         struct bitmap_counts old_counts;
1946         unsigned long chunks;
1947         sector_t block;
1948         sector_t old_blocks, new_blocks;
1949         int chunkshift;
1950         int ret = 0;
1951         long pages;
1952         struct bitmap_page *new_bp;
1953
1954         if (chunksize == 0) {
1955                 /* If there is enough space, leave the chunk size unchanged,
1956                  * else increase by factor of two until there is enough space.
1957                  */
1958                 long bytes;
1959                 long space = bitmap->mddev->bitmap_info.space;
1960
1961                 if (space == 0) {
1962                         /* We don't know how much space there is, so limit
1963                          * to current size - in sectors.
1964                          */
1965                         bytes = DIV_ROUND_UP(bitmap->counts.chunks, 8);
1966                         if (!bitmap->mddev->bitmap_info.external)
1967                                 bytes += sizeof(bitmap_super_t);
1968                         space = DIV_ROUND_UP(bytes, 512);
1969                         bitmap->mddev->bitmap_info.space = space;
1970                 }
1971                 chunkshift = bitmap->counts.chunkshift;
1972                 chunkshift--;
1973                 do {
1974                         /* 'chunkshift' is shift from block size to chunk size */
1975                         chunkshift++;
1976                         chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift);
1977                         bytes = DIV_ROUND_UP(chunks, 8);
1978                         if (!bitmap->mddev->bitmap_info.external)
1979                                 bytes += sizeof(bitmap_super_t);
1980                 } while (bytes > (space << 9));
1981         } else
1982                 chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT;
1983
1984         chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift);
1985         memset(&store, 0, sizeof(store));
1986         if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file)
1987                 ret = bitmap_storage_alloc(&store, chunks,
1988                                            !bitmap->mddev->bitmap_info.external,
1989                                            bitmap->cluster_slot);
1990         if (ret)
1991                 goto err;
1992
1993         pages = DIV_ROUND_UP(chunks, PAGE_COUNTER_RATIO);
1994
1995         new_bp = kzalloc(pages * sizeof(*new_bp), GFP_KERNEL);
1996         ret = -ENOMEM;
1997         if (!new_bp) {
1998                 bitmap_file_unmap(&store);
1999                 goto err;
2000         }
2001
2002         if (!init)
2003                 bitmap->mddev->pers->quiesce(bitmap->mddev, 1);
2004
2005         store.file = bitmap->storage.file;
2006         bitmap->storage.file = NULL;
2007
2008         if (store.sb_page && bitmap->storage.sb_page)
2009                 memcpy(page_address(store.sb_page),
2010                        page_address(bitmap->storage.sb_page),
2011                        sizeof(bitmap_super_t));
2012         bitmap_file_unmap(&bitmap->storage);
2013         bitmap->storage = store;
2014
2015         old_counts = bitmap->counts;
2016         bitmap->counts.bp = new_bp;
2017         bitmap->counts.pages = pages;
2018         bitmap->counts.missing_pages = pages;
2019         bitmap->counts.chunkshift = chunkshift;
2020         bitmap->counts.chunks = chunks;
2021         bitmap->mddev->bitmap_info.chunksize = 1 << (chunkshift +
2022                                                      BITMAP_BLOCK_SHIFT);
2023
2024         blocks = min(old_counts.chunks << old_counts.chunkshift,
2025                      chunks << chunkshift);
2026
2027         spin_lock_irq(&bitmap->counts.lock);
2028         for (block = 0; block < blocks; ) {
2029                 bitmap_counter_t *bmc_old, *bmc_new;
2030                 int set;
2031
2032                 bmc_old = bitmap_get_counter(&old_counts, block,
2033                                              &old_blocks, 0);
2034                 set = bmc_old && NEEDED(*bmc_old);
2035
2036                 if (set) {
2037                         bmc_new = bitmap_get_counter(&bitmap->counts, block,
2038                                                      &new_blocks, 1);
2039                         if (*bmc_new == 0) {
2040                                 /* need to set on-disk bits too. */
2041                                 sector_t end = block + new_blocks;
2042                                 sector_t start = block >> chunkshift;
2043                                 start <<= chunkshift;
2044                                 while (start < end) {
2045                                         bitmap_file_set_bit(bitmap, block);
2046                                         start += 1 << chunkshift;
2047                                 }
2048                                 *bmc_new = 2;
2049                                 bitmap_count_page(&bitmap->counts,
2050                                                   block, 1);
2051                                 bitmap_set_pending(&bitmap->counts,
2052                                                    block);
2053                         }
2054                         *bmc_new |= NEEDED_MASK;
2055                         if (new_blocks < old_blocks)
2056                                 old_blocks = new_blocks;
2057                 }
2058                 block += old_blocks;
2059         }
2060
2061         if (!init) {
2062                 int i;
2063                 while (block < (chunks << chunkshift)) {
2064                         bitmap_counter_t *bmc;
2065                         bmc = bitmap_get_counter(&bitmap->counts, block,
2066                                                  &new_blocks, 1);
2067                         if (bmc) {
2068                                 /* new space.  It needs to be resynced, so
2069                                  * we set NEEDED_MASK.
2070                                  */
2071                                 if (*bmc == 0) {
2072                                         *bmc = NEEDED_MASK | 2;
2073                                         bitmap_count_page(&bitmap->counts,
2074                                                           block, 1);
2075                                         bitmap_set_pending(&bitmap->counts,
2076                                                            block);
2077                                 }
2078                         }
2079                         block += new_blocks;
2080                 }
2081                 for (i = 0; i < bitmap->storage.file_pages; i++)
2082                         set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
2083         }
2084         spin_unlock_irq(&bitmap->counts.lock);
2085
2086         if (!init) {
2087                 bitmap_unplug(bitmap);
2088                 bitmap->mddev->pers->quiesce(bitmap->mddev, 0);
2089         }
2090         ret = 0;
2091 err:
2092         return ret;
2093 }
2094 EXPORT_SYMBOL_GPL(bitmap_resize);
2095
2096 static ssize_t
2097 location_show(struct mddev *mddev, char *page)
2098 {
2099         ssize_t len;
2100         if (mddev->bitmap_info.file)
2101                 len = sprintf(page, "file");
2102         else if (mddev->bitmap_info.offset)
2103                 len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset);
2104         else
2105                 len = sprintf(page, "none");
2106         len += sprintf(page+len, "\n");
2107         return len;
2108 }
2109
2110 static ssize_t
2111 location_store(struct mddev *mddev, const char *buf, size_t len)
2112 {
2113
2114         if (mddev->pers) {
2115                 if (!mddev->pers->quiesce)
2116                         return -EBUSY;
2117                 if (mddev->recovery || mddev->sync_thread)
2118                         return -EBUSY;
2119         }
2120
2121         if (mddev->bitmap || mddev->bitmap_info.file ||
2122             mddev->bitmap_info.offset) {
2123                 /* bitmap already configured.  Only option is to clear it */
2124                 if (strncmp(buf, "none", 4) != 0)
2125                         return -EBUSY;
2126                 if (mddev->pers) {
2127                         mddev->pers->quiesce(mddev, 1);
2128                         bitmap_destroy(mddev);
2129                         mddev->pers->quiesce(mddev, 0);
2130                 }
2131                 mddev->bitmap_info.offset = 0;
2132                 if (mddev->bitmap_info.file) {
2133                         struct file *f = mddev->bitmap_info.file;
2134                         mddev->bitmap_info.file = NULL;
2135                         fput(f);
2136                 }
2137         } else {
2138                 /* No bitmap, OK to set a location */
2139                 long long offset;
2140                 if (strncmp(buf, "none", 4) == 0)
2141                         /* nothing to be done */;
2142                 else if (strncmp(buf, "file:", 5) == 0) {
2143                         /* Not supported yet */
2144                         return -EINVAL;
2145                 } else {
2146                         int rv;
2147                         if (buf[0] == '+')
2148                                 rv = kstrtoll(buf+1, 10, &offset);
2149                         else
2150                                 rv = kstrtoll(buf, 10, &offset);
2151                         if (rv)
2152                                 return rv;
2153                         if (offset == 0)
2154                                 return -EINVAL;
2155                         if (mddev->bitmap_info.external == 0 &&
2156                             mddev->major_version == 0 &&
2157                             offset != mddev->bitmap_info.default_offset)
2158                                 return -EINVAL;
2159                         mddev->bitmap_info.offset = offset;
2160                         if (mddev->pers) {
2161                                 struct bitmap *bitmap;
2162                                 mddev->pers->quiesce(mddev, 1);
2163                                 bitmap = bitmap_create(mddev, -1);
2164                                 if (IS_ERR(bitmap))
2165                                         rv = PTR_ERR(bitmap);
2166                                 else {
2167                                         mddev->bitmap = bitmap;
2168                                         rv = bitmap_load(mddev);
2169                                         if (rv) {
2170                                                 bitmap_destroy(mddev);
2171                                                 mddev->bitmap_info.offset = 0;
2172                                         }
2173                                 }
2174                                 mddev->pers->quiesce(mddev, 0);
2175                                 if (rv)
2176                                         return rv;
2177                         }
2178                 }
2179         }
2180         if (!mddev->external) {
2181                 /* Ensure new bitmap info is stored in
2182                  * metadata promptly.
2183                  */
2184                 set_bit(MD_CHANGE_DEVS, &mddev->flags);
2185                 md_wakeup_thread(mddev->thread);
2186         }
2187         return len;
2188 }
2189
2190 static struct md_sysfs_entry bitmap_location =
2191 __ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store);
2192
2193 /* 'bitmap/space' is the space available at 'location' for the
2194  * bitmap.  This allows the kernel to know when it is safe to
2195  * resize the bitmap to match a resized array.
2196  */
2197 static ssize_t
2198 space_show(struct mddev *mddev, char *page)
2199 {
2200         return sprintf(page, "%lu\n", mddev->bitmap_info.space);
2201 }
2202
2203 static ssize_t
2204 space_store(struct mddev *mddev, const char *buf, size_t len)
2205 {
2206         unsigned long sectors;
2207         int rv;
2208
2209         rv = kstrtoul(buf, 10, &sectors);
2210         if (rv)
2211                 return rv;
2212
2213         if (sectors == 0)
2214                 return -EINVAL;
2215
2216         if (mddev->bitmap &&
2217             sectors < (mddev->bitmap->storage.bytes + 511) >> 9)
2218                 return -EFBIG; /* Bitmap is too big for this small space */
2219
2220         /* could make sure it isn't too big, but that isn't really
2221          * needed - user-space should be careful.
2222          */
2223         mddev->bitmap_info.space = sectors;
2224         return len;
2225 }
2226
2227 static struct md_sysfs_entry bitmap_space =
2228 __ATTR(space, S_IRUGO|S_IWUSR, space_show, space_store);
2229
2230 static ssize_t
2231 timeout_show(struct mddev *mddev, char *page)
2232 {
2233         ssize_t len;
2234         unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ;
2235         unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ;
2236
2237         len = sprintf(page, "%lu", secs);
2238         if (jifs)
2239                 len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs));
2240         len += sprintf(page+len, "\n");
2241         return len;
2242 }
2243
2244 static ssize_t
2245 timeout_store(struct mddev *mddev, const char *buf, size_t len)
2246 {
2247         /* timeout can be set at any time */
2248         unsigned long timeout;
2249         int rv = strict_strtoul_scaled(buf, &timeout, 4);
2250         if (rv)
2251                 return rv;
2252
2253         /* just to make sure we don't overflow... */
2254         if (timeout >= LONG_MAX / HZ)
2255                 return -EINVAL;
2256
2257         timeout = timeout * HZ / 10000;
2258
2259         if (timeout >= MAX_SCHEDULE_TIMEOUT)
2260                 timeout = MAX_SCHEDULE_TIMEOUT-1;
2261         if (timeout < 1)
2262                 timeout = 1;
2263         mddev->bitmap_info.daemon_sleep = timeout;
2264         if (mddev->thread) {
2265                 /* if thread->timeout is MAX_SCHEDULE_TIMEOUT, then
2266                  * the bitmap is all clean and we don't need to
2267                  * adjust the timeout right now
2268                  */
2269                 if (mddev->thread->timeout < MAX_SCHEDULE_TIMEOUT) {
2270                         mddev->thread->timeout = timeout;
2271                         md_wakeup_thread(mddev->thread);
2272                 }
2273         }
2274         return len;
2275 }
2276
2277 static struct md_sysfs_entry bitmap_timeout =
2278 __ATTR(time_base, S_IRUGO|S_IWUSR, timeout_show, timeout_store);
2279
2280 static ssize_t
2281 backlog_show(struct mddev *mddev, char *page)
2282 {
2283         return sprintf(page, "%lu\n", mddev->bitmap_info.max_write_behind);
2284 }
2285
2286 static ssize_t
2287 backlog_store(struct mddev *mddev, const char *buf, size_t len)
2288 {
2289         unsigned long backlog;
2290         int rv = kstrtoul(buf, 10, &backlog);
2291         if (rv)
2292                 return rv;
2293         if (backlog > COUNTER_MAX)
2294                 return -EINVAL;
2295         mddev->bitmap_info.max_write_behind = backlog;
2296         return len;
2297 }
2298
2299 static struct md_sysfs_entry bitmap_backlog =
2300 __ATTR(backlog, S_IRUGO|S_IWUSR, backlog_show, backlog_store);
2301
2302 static ssize_t
2303 chunksize_show(struct mddev *mddev, char *page)
2304 {
2305         return sprintf(page, "%lu\n", mddev->bitmap_info.chunksize);
2306 }
2307
2308 static ssize_t
2309 chunksize_store(struct mddev *mddev, const char *buf, size_t len)
2310 {
2311         /* Can only be changed when no bitmap is active */
2312         int rv;
2313         unsigned long csize;
2314         if (mddev->bitmap)
2315                 return -EBUSY;
2316         rv = kstrtoul(buf, 10, &csize);
2317         if (rv)
2318                 return rv;
2319         if (csize < 512 ||
2320             !is_power_of_2(csize))
2321                 return -EINVAL;
2322         mddev->bitmap_info.chunksize = csize;
2323         return len;
2324 }
2325
2326 static struct md_sysfs_entry bitmap_chunksize =
2327 __ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);
2328
2329 static ssize_t metadata_show(struct mddev *mddev, char *page)
2330 {
2331         if (mddev_is_clustered(mddev))
2332                 return sprintf(page, "clustered\n");
2333         return sprintf(page, "%s\n", (mddev->bitmap_info.external
2334                                       ? "external" : "internal"));
2335 }
2336
2337 static ssize_t metadata_store(struct mddev *mddev, const char *buf, size_t len)
2338 {
2339         if (mddev->bitmap ||
2340             mddev->bitmap_info.file ||
2341             mddev->bitmap_info.offset)
2342                 return -EBUSY;
2343         if (strncmp(buf, "external", 8) == 0)
2344                 mddev->bitmap_info.external = 1;
2345         else if ((strncmp(buf, "internal", 8) == 0) ||
2346                         (strncmp(buf, "clustered", 9) == 0))
2347                 mddev->bitmap_info.external = 0;
2348         else
2349                 return -EINVAL;
2350         return len;
2351 }
2352
2353 static struct md_sysfs_entry bitmap_metadata =
2354 __ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
2355
2356 static ssize_t can_clear_show(struct mddev *mddev, char *page)
2357 {
2358         int len;
2359         spin_lock(&mddev->lock);
2360         if (mddev->bitmap)
2361                 len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ?
2362                                              "false" : "true"));
2363         else
2364                 len = sprintf(page, "\n");
2365         spin_unlock(&mddev->lock);
2366         return len;
2367 }
2368
2369 static ssize_t can_clear_store(struct mddev *mddev, const char *buf, size_t len)
2370 {
2371         if (mddev->bitmap == NULL)
2372                 return -ENOENT;
2373         if (strncmp(buf, "false", 5) == 0)
2374                 mddev->bitmap->need_sync = 1;
2375         else if (strncmp(buf, "true", 4) == 0) {
2376                 if (mddev->degraded)
2377                         return -EBUSY;
2378                 mddev->bitmap->need_sync = 0;
2379         } else
2380                 return -EINVAL;
2381         return len;
2382 }
2383
2384 static struct md_sysfs_entry bitmap_can_clear =
2385 __ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store);
2386
2387 static ssize_t
2388 behind_writes_used_show(struct mddev *mddev, char *page)
2389 {
2390         ssize_t ret;
2391         spin_lock(&mddev->lock);
2392         if (mddev->bitmap == NULL)
2393                 ret = sprintf(page, "0\n");
2394         else
2395                 ret = sprintf(page, "%lu\n",
2396                               mddev->bitmap->behind_writes_used);
2397         spin_unlock(&mddev->lock);
2398         return ret;
2399 }
2400
2401 static ssize_t
2402 behind_writes_used_reset(struct mddev *mddev, const char *buf, size_t len)
2403 {
2404         if (mddev->bitmap)
2405                 mddev->bitmap->behind_writes_used = 0;
2406         return len;
2407 }
2408
2409 static struct md_sysfs_entry max_backlog_used =
2410 __ATTR(max_backlog_used, S_IRUGO | S_IWUSR,
2411        behind_writes_used_show, behind_writes_used_reset);
2412
2413 static struct attribute *md_bitmap_attrs[] = {
2414         &bitmap_location.attr,
2415         &bitmap_space.attr,
2416         &bitmap_timeout.attr,
2417         &bitmap_backlog.attr,
2418         &bitmap_chunksize.attr,
2419         &bitmap_metadata.attr,
2420         &bitmap_can_clear.attr,
2421         &max_backlog_used.attr,
2422         NULL
2423 };
2424 struct attribute_group md_bitmap_group = {
2425         .name = "bitmap",
2426         .attrs = md_bitmap_attrs,
2427 };
2428