Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wirel...
[cascardo/linux.git] / drivers / md / bitmap.c
1 /*
2  * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
3  *
4  * bitmap_create  - sets up the bitmap structure
5  * bitmap_destroy - destroys the bitmap structure
6  *
7  * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.:
8  * - added disk storage for bitmap
9  * - changes to allow various bitmap chunk sizes
10  */
11
12 /*
13  * Still to do:
14  *
15  * flush after percent set rather than just time based. (maybe both).
16  */
17
18 #include <linux/blkdev.h>
19 #include <linux/module.h>
20 #include <linux/errno.h>
21 #include <linux/slab.h>
22 #include <linux/init.h>
23 #include <linux/timer.h>
24 #include <linux/sched.h>
25 #include <linux/list.h>
26 #include <linux/file.h>
27 #include <linux/mount.h>
28 #include <linux/buffer_head.h>
29 #include <linux/seq_file.h>
30 #include "md.h"
31 #include "bitmap.h"
32
33 static inline char *bmname(struct bitmap *bitmap)
34 {
35         return bitmap->mddev ? mdname(bitmap->mddev) : "mdX";
36 }
37
38 /*
39  * check a page and, if necessary, allocate it (or hijack it if the alloc fails)
40  *
41  * 1) check to see if this page is allocated, if it's not then try to alloc
42  * 2) if the alloc fails, set the page's hijacked flag so we'll use the
43  *    page pointer directly as a counter
44  *
45  * if we find our page, we increment the page's refcount so that it stays
46  * allocated while we're using it
47  */
48 static int bitmap_checkpage(struct bitmap_counts *bitmap,
49                             unsigned long page, int create)
50 __releases(bitmap->lock)
51 __acquires(bitmap->lock)
52 {
53         unsigned char *mappage;
54
55         if (page >= bitmap->pages) {
56                 /* This can happen if bitmap_start_sync goes beyond
57                  * End-of-device while looking for a whole page.
58                  * It is harmless.
59                  */
60                 return -EINVAL;
61         }
62
63         if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */
64                 return 0;
65
66         if (bitmap->bp[page].map) /* page is already allocated, just return */
67                 return 0;
68
69         if (!create)
70                 return -ENOENT;
71
72         /* this page has not been allocated yet */
73
74         spin_unlock_irq(&bitmap->lock);
75         mappage = kzalloc(PAGE_SIZE, GFP_NOIO);
76         spin_lock_irq(&bitmap->lock);
77
78         if (mappage == NULL) {
79                 pr_debug("md/bitmap: map page allocation failed, hijacking\n");
80                 /* failed - set the hijacked flag so that we can use the
81                  * pointer as a counter */
82                 if (!bitmap->bp[page].map)
83                         bitmap->bp[page].hijacked = 1;
84         } else if (bitmap->bp[page].map ||
85                    bitmap->bp[page].hijacked) {
86                 /* somebody beat us to getting the page */
87                 kfree(mappage);
88                 return 0;
89         } else {
90
91                 /* no page was in place and we have one, so install it */
92
93                 bitmap->bp[page].map = mappage;
94                 bitmap->missing_pages--;
95         }
96         return 0;
97 }
98
99 /* if page is completely empty, put it back on the free list, or dealloc it */
100 /* if page was hijacked, unmark the flag so it might get alloced next time */
101 /* Note: lock should be held when calling this */
102 static void bitmap_checkfree(struct bitmap_counts *bitmap, unsigned long page)
103 {
104         char *ptr;
105
106         if (bitmap->bp[page].count) /* page is still busy */
107                 return;
108
109         /* page is no longer in use, it can be released */
110
111         if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */
112                 bitmap->bp[page].hijacked = 0;
113                 bitmap->bp[page].map = NULL;
114         } else {
115                 /* normal case, free the page */
116                 ptr = bitmap->bp[page].map;
117                 bitmap->bp[page].map = NULL;
118                 bitmap->missing_pages++;
119                 kfree(ptr);
120         }
121 }
122
123 /*
124  * bitmap file handling - read and write the bitmap file and its superblock
125  */
126
127 /*
128  * basic page I/O operations
129  */
130
131 /* IO operations when bitmap is stored near all superblocks */
132 static int read_sb_page(struct mddev *mddev, loff_t offset,
133                         struct page *page,
134                         unsigned long index, int size)
135 {
136         /* choose a good rdev and read the page from there */
137
138         struct md_rdev *rdev;
139         sector_t target;
140
141         rdev_for_each(rdev, mddev) {
142                 if (! test_bit(In_sync, &rdev->flags)
143                     || test_bit(Faulty, &rdev->flags))
144                         continue;
145
146                 target = offset + index * (PAGE_SIZE/512);
147
148                 if (sync_page_io(rdev, target,
149                                  roundup(size, bdev_logical_block_size(rdev->bdev)),
150                                  page, READ, true)) {
151                         page->index = index;
152                         return 0;
153                 }
154         }
155         return -EIO;
156 }
157
158 static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mddev)
159 {
160         /* Iterate the disks of an mddev, using rcu to protect access to the
161          * linked list, and raising the refcount of devices we return to ensure
162          * they don't disappear while in use.
163          * As devices are only added or removed when raid_disk is < 0 and
164          * nr_pending is 0 and In_sync is clear, the entries we return will
165          * still be in the same position on the list when we re-enter
166          * list_for_each_continue_rcu.
167          */
168         struct list_head *pos;
169         rcu_read_lock();
170         if (rdev == NULL)
171                 /* start at the beginning */
172                 pos = &mddev->disks;
173         else {
174                 /* release the previous rdev and start from there. */
175                 rdev_dec_pending(rdev, mddev);
176                 pos = &rdev->same_set;
177         }
178         list_for_each_continue_rcu(pos, &mddev->disks) {
179                 rdev = list_entry(pos, struct md_rdev, same_set);
180                 if (rdev->raid_disk >= 0 &&
181                     !test_bit(Faulty, &rdev->flags)) {
182                         /* this is a usable devices */
183                         atomic_inc(&rdev->nr_pending);
184                         rcu_read_unlock();
185                         return rdev;
186                 }
187         }
188         rcu_read_unlock();
189         return NULL;
190 }
191
192 static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
193 {
194         struct md_rdev *rdev = NULL;
195         struct block_device *bdev;
196         struct mddev *mddev = bitmap->mddev;
197         struct bitmap_storage *store = &bitmap->storage;
198
199         while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
200                 int size = PAGE_SIZE;
201                 loff_t offset = mddev->bitmap_info.offset;
202
203                 bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
204
205                 if (page->index == store->file_pages-1) {
206                         int last_page_size = store->bytes & (PAGE_SIZE-1);
207                         if (last_page_size == 0)
208                                 last_page_size = PAGE_SIZE;
209                         size = roundup(last_page_size,
210                                        bdev_logical_block_size(bdev));
211                 }
212                 /* Just make sure we aren't corrupting data or
213                  * metadata
214                  */
215                 if (mddev->external) {
216                         /* Bitmap could be anywhere. */
217                         if (rdev->sb_start + offset + (page->index
218                                                        * (PAGE_SIZE/512))
219                             > rdev->data_offset
220                             &&
221                             rdev->sb_start + offset
222                             < (rdev->data_offset + mddev->dev_sectors
223                              + (PAGE_SIZE/512)))
224                                 goto bad_alignment;
225                 } else if (offset < 0) {
226                         /* DATA  BITMAP METADATA  */
227                         if (offset
228                             + (long)(page->index * (PAGE_SIZE/512))
229                             + size/512 > 0)
230                                 /* bitmap runs in to metadata */
231                                 goto bad_alignment;
232                         if (rdev->data_offset + mddev->dev_sectors
233                             > rdev->sb_start + offset)
234                                 /* data runs in to bitmap */
235                                 goto bad_alignment;
236                 } else if (rdev->sb_start < rdev->data_offset) {
237                         /* METADATA BITMAP DATA */
238                         if (rdev->sb_start
239                             + offset
240                             + page->index*(PAGE_SIZE/512) + size/512
241                             > rdev->data_offset)
242                                 /* bitmap runs in to data */
243                                 goto bad_alignment;
244                 } else {
245                         /* DATA METADATA BITMAP - no problems */
246                 }
247                 md_super_write(mddev, rdev,
248                                rdev->sb_start + offset
249                                + page->index * (PAGE_SIZE/512),
250                                size,
251                                page);
252         }
253
254         if (wait)
255                 md_super_wait(mddev);
256         return 0;
257
258  bad_alignment:
259         return -EINVAL;
260 }
261
262 static void bitmap_file_kick(struct bitmap *bitmap);
263 /*
264  * write out a page to a file
265  */
266 static void write_page(struct bitmap *bitmap, struct page *page, int wait)
267 {
268         struct buffer_head *bh;
269
270         if (bitmap->storage.file == NULL) {
271                 switch (write_sb_page(bitmap, page, wait)) {
272                 case -EINVAL:
273                         set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
274                 }
275         } else {
276
277                 bh = page_buffers(page);
278
279                 while (bh && bh->b_blocknr) {
280                         atomic_inc(&bitmap->pending_writes);
281                         set_buffer_locked(bh);
282                         set_buffer_mapped(bh);
283                         submit_bh(WRITE | REQ_SYNC, bh);
284                         bh = bh->b_this_page;
285                 }
286
287                 if (wait)
288                         wait_event(bitmap->write_wait,
289                                    atomic_read(&bitmap->pending_writes)==0);
290         }
291         if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
292                 bitmap_file_kick(bitmap);
293 }
294
295 static void end_bitmap_write(struct buffer_head *bh, int uptodate)
296 {
297         struct bitmap *bitmap = bh->b_private;
298
299         if (!uptodate)
300                 set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
301         if (atomic_dec_and_test(&bitmap->pending_writes))
302                 wake_up(&bitmap->write_wait);
303 }
304
305 /* copied from buffer.c */
306 static void
307 __clear_page_buffers(struct page *page)
308 {
309         ClearPagePrivate(page);
310         set_page_private(page, 0);
311         page_cache_release(page);
312 }
313 static void free_buffers(struct page *page)
314 {
315         struct buffer_head *bh;
316
317         if (!PagePrivate(page))
318                 return;
319
320         bh = page_buffers(page);
321         while (bh) {
322                 struct buffer_head *next = bh->b_this_page;
323                 free_buffer_head(bh);
324                 bh = next;
325         }
326         __clear_page_buffers(page);
327         put_page(page);
328 }
329
330 /* read a page from a file.
331  * We both read the page, and attach buffers to the page to record the
332  * address of each block (using bmap).  These addresses will be used
333  * to write the block later, completely bypassing the filesystem.
334  * This usage is similar to how swap files are handled, and allows us
335  * to write to a file with no concerns of memory allocation failing.
336  */
337 static int read_page(struct file *file, unsigned long index,
338                      struct bitmap *bitmap,
339                      unsigned long count,
340                      struct page *page)
341 {
342         int ret = 0;
343         struct inode *inode = file->f_path.dentry->d_inode;
344         struct buffer_head *bh;
345         sector_t block;
346
347         pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE,
348                  (unsigned long long)index << PAGE_SHIFT);
349
350         bh = alloc_page_buffers(page, 1<<inode->i_blkbits, 0);
351         if (!bh) {
352                 ret = -ENOMEM;
353                 goto out;
354         }
355         attach_page_buffers(page, bh);
356         block = index << (PAGE_SHIFT - inode->i_blkbits);
357         while (bh) {
358                 if (count == 0)
359                         bh->b_blocknr = 0;
360                 else {
361                         bh->b_blocknr = bmap(inode, block);
362                         if (bh->b_blocknr == 0) {
363                                 /* Cannot use this file! */
364                                 ret = -EINVAL;
365                                 goto out;
366                         }
367                         bh->b_bdev = inode->i_sb->s_bdev;
368                         if (count < (1<<inode->i_blkbits))
369                                 count = 0;
370                         else
371                                 count -= (1<<inode->i_blkbits);
372
373                         bh->b_end_io = end_bitmap_write;
374                         bh->b_private = bitmap;
375                         atomic_inc(&bitmap->pending_writes);
376                         set_buffer_locked(bh);
377                         set_buffer_mapped(bh);
378                         submit_bh(READ, bh);
379                 }
380                 block++;
381                 bh = bh->b_this_page;
382         }
383         page->index = index;
384
385         wait_event(bitmap->write_wait,
386                    atomic_read(&bitmap->pending_writes)==0);
387         if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
388                 ret = -EIO;
389 out:
390         if (ret)
391                 printk(KERN_ALERT "md: bitmap read error: (%dB @ %llu): %d\n",
392                         (int)PAGE_SIZE,
393                         (unsigned long long)index << PAGE_SHIFT,
394                         ret);
395         return ret;
396 }
397
398 /*
399  * bitmap file superblock operations
400  */
401
402 /* update the event counter and sync the superblock to disk */
403 void bitmap_update_sb(struct bitmap *bitmap)
404 {
405         bitmap_super_t *sb;
406
407         if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
408                 return;
409         if (bitmap->mddev->bitmap_info.external)
410                 return;
411         if (!bitmap->storage.sb_page) /* no superblock */
412                 return;
413         sb = kmap_atomic(bitmap->storage.sb_page);
414         sb->events = cpu_to_le64(bitmap->mddev->events);
415         if (bitmap->mddev->events < bitmap->events_cleared)
416                 /* rocking back to read-only */
417                 bitmap->events_cleared = bitmap->mddev->events;
418         sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
419         sb->state = cpu_to_le32(bitmap->flags);
420         /* Just in case these have been changed via sysfs: */
421         sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ);
422         sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind);
423         /* This might have been changed by a reshape */
424         sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
425         sb->chunksize = cpu_to_le32(bitmap->mddev->bitmap_info.chunksize);
426         sb->sectors_reserved = cpu_to_le32(bitmap->mddev->
427                                            bitmap_info.space);
428         kunmap_atomic(sb);
429         write_page(bitmap, bitmap->storage.sb_page, 1);
430 }
431
432 /* print out the bitmap file superblock */
433 void bitmap_print_sb(struct bitmap *bitmap)
434 {
435         bitmap_super_t *sb;
436
437         if (!bitmap || !bitmap->storage.sb_page)
438                 return;
439         sb = kmap_atomic(bitmap->storage.sb_page);
440         printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap));
441         printk(KERN_DEBUG "         magic: %08x\n", le32_to_cpu(sb->magic));
442         printk(KERN_DEBUG "       version: %d\n", le32_to_cpu(sb->version));
443         printk(KERN_DEBUG "          uuid: %08x.%08x.%08x.%08x\n",
444                                         *(__u32 *)(sb->uuid+0),
445                                         *(__u32 *)(sb->uuid+4),
446                                         *(__u32 *)(sb->uuid+8),
447                                         *(__u32 *)(sb->uuid+12));
448         printk(KERN_DEBUG "        events: %llu\n",
449                         (unsigned long long) le64_to_cpu(sb->events));
450         printk(KERN_DEBUG "events cleared: %llu\n",
451                         (unsigned long long) le64_to_cpu(sb->events_cleared));
452         printk(KERN_DEBUG "         state: %08x\n", le32_to_cpu(sb->state));
453         printk(KERN_DEBUG "     chunksize: %d B\n", le32_to_cpu(sb->chunksize));
454         printk(KERN_DEBUG "  daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep));
455         printk(KERN_DEBUG "     sync size: %llu KB\n",
456                         (unsigned long long)le64_to_cpu(sb->sync_size)/2);
457         printk(KERN_DEBUG "max write behind: %d\n", le32_to_cpu(sb->write_behind));
458         kunmap_atomic(sb);
459 }
460
461 /*
462  * bitmap_new_disk_sb
463  * @bitmap
464  *
465  * This function is somewhat the reverse of bitmap_read_sb.  bitmap_read_sb
466  * reads and verifies the on-disk bitmap superblock and populates bitmap_info.
467  * This function verifies 'bitmap_info' and populates the on-disk bitmap
468  * structure, which is to be written to disk.
469  *
470  * Returns: 0 on success, -Exxx on error
471  */
472 static int bitmap_new_disk_sb(struct bitmap *bitmap)
473 {
474         bitmap_super_t *sb;
475         unsigned long chunksize, daemon_sleep, write_behind;
476         int err = -EINVAL;
477
478         bitmap->storage.sb_page = alloc_page(GFP_KERNEL);
479         if (IS_ERR(bitmap->storage.sb_page)) {
480                 err = PTR_ERR(bitmap->storage.sb_page);
481                 bitmap->storage.sb_page = NULL;
482                 return err;
483         }
484         bitmap->storage.sb_page->index = 0;
485
486         sb = kmap_atomic(bitmap->storage.sb_page);
487
488         sb->magic = cpu_to_le32(BITMAP_MAGIC);
489         sb->version = cpu_to_le32(BITMAP_MAJOR_HI);
490
491         chunksize = bitmap->mddev->bitmap_info.chunksize;
492         BUG_ON(!chunksize);
493         if (!is_power_of_2(chunksize)) {
494                 kunmap_atomic(sb);
495                 printk(KERN_ERR "bitmap chunksize not a power of 2\n");
496                 return -EINVAL;
497         }
498         sb->chunksize = cpu_to_le32(chunksize);
499
500         daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep;
501         if (!daemon_sleep ||
502             (daemon_sleep < 1) || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
503                 printk(KERN_INFO "Choosing daemon_sleep default (5 sec)\n");
504                 daemon_sleep = 5 * HZ;
505         }
506         sb->daemon_sleep = cpu_to_le32(daemon_sleep);
507         bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
508
509         /*
510          * FIXME: write_behind for RAID1.  If not specified, what
511          * is a good choice?  We choose COUNTER_MAX / 2 arbitrarily.
512          */
513         write_behind = bitmap->mddev->bitmap_info.max_write_behind;
514         if (write_behind > COUNTER_MAX)
515                 write_behind = COUNTER_MAX / 2;
516         sb->write_behind = cpu_to_le32(write_behind);
517         bitmap->mddev->bitmap_info.max_write_behind = write_behind;
518
519         /* keep the array size field of the bitmap superblock up to date */
520         sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
521
522         memcpy(sb->uuid, bitmap->mddev->uuid, 16);
523
524         set_bit(BITMAP_STALE, &bitmap->flags);
525         sb->state = cpu_to_le32(bitmap->flags);
526         bitmap->events_cleared = bitmap->mddev->events;
527         sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
528
529         kunmap_atomic(sb);
530
531         return 0;
532 }
533
534 /* read the superblock from the bitmap file and initialize some bitmap fields */
535 static int bitmap_read_sb(struct bitmap *bitmap)
536 {
537         char *reason = NULL;
538         bitmap_super_t *sb;
539         unsigned long chunksize, daemon_sleep, write_behind;
540         unsigned long long events;
541         unsigned long sectors_reserved = 0;
542         int err = -EINVAL;
543         struct page *sb_page;
544
545         if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) {
546                 chunksize = 128 * 1024 * 1024;
547                 daemon_sleep = 5 * HZ;
548                 write_behind = 0;
549                 set_bit(BITMAP_STALE, &bitmap->flags);
550                 err = 0;
551                 goto out_no_sb;
552         }
553         /* page 0 is the superblock, read it... */
554         sb_page = alloc_page(GFP_KERNEL);
555         if (!sb_page)
556                 return -ENOMEM;
557         bitmap->storage.sb_page = sb_page;
558
559         if (bitmap->storage.file) {
560                 loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host);
561                 int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize;
562
563                 err = read_page(bitmap->storage.file, 0,
564                                 bitmap, bytes, sb_page);
565         } else {
566                 err = read_sb_page(bitmap->mddev,
567                                    bitmap->mddev->bitmap_info.offset,
568                                    sb_page,
569                                    0, sizeof(bitmap_super_t));
570         }
571         if (err)
572                 return err;
573
574         sb = kmap_atomic(sb_page);
575
576         chunksize = le32_to_cpu(sb->chunksize);
577         daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
578         write_behind = le32_to_cpu(sb->write_behind);
579         sectors_reserved = le32_to_cpu(sb->sectors_reserved);
580
581         /* verify that the bitmap-specific fields are valid */
582         if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
583                 reason = "bad magic";
584         else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO ||
585                  le32_to_cpu(sb->version) > BITMAP_MAJOR_HI)
586                 reason = "unrecognized superblock version";
587         else if (chunksize < 512)
588                 reason = "bitmap chunksize too small";
589         else if (!is_power_of_2(chunksize))
590                 reason = "bitmap chunksize not a power of 2";
591         else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT)
592                 reason = "daemon sleep period out of range";
593         else if (write_behind > COUNTER_MAX)
594                 reason = "write-behind limit out of range (0 - 16383)";
595         if (reason) {
596                 printk(KERN_INFO "%s: invalid bitmap file superblock: %s\n",
597                         bmname(bitmap), reason);
598                 goto out;
599         }
600
601         /* keep the array size field of the bitmap superblock up to date */
602         sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
603
604         if (bitmap->mddev->persistent) {
605                 /*
606                  * We have a persistent array superblock, so compare the
607                  * bitmap's UUID and event counter to the mddev's
608                  */
609                 if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) {
610                         printk(KERN_INFO
611                                "%s: bitmap superblock UUID mismatch\n",
612                                bmname(bitmap));
613                         goto out;
614                 }
615                 events = le64_to_cpu(sb->events);
616                 if (events < bitmap->mddev->events) {
617                         printk(KERN_INFO
618                                "%s: bitmap file is out of date (%llu < %llu) "
619                                "-- forcing full recovery\n",
620                                bmname(bitmap), events,
621                                (unsigned long long) bitmap->mddev->events);
622                         set_bit(BITMAP_STALE, &bitmap->flags);
623                 }
624         }
625
626         /* assign fields using values from superblock */
627         bitmap->flags |= le32_to_cpu(sb->state);
628         if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
629                 set_bit(BITMAP_HOSTENDIAN, &bitmap->flags);
630         bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
631         err = 0;
632 out:
633         kunmap_atomic(sb);
634 out_no_sb:
635         if (test_bit(BITMAP_STALE, &bitmap->flags))
636                 bitmap->events_cleared = bitmap->mddev->events;
637         bitmap->mddev->bitmap_info.chunksize = chunksize;
638         bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
639         bitmap->mddev->bitmap_info.max_write_behind = write_behind;
640         if (bitmap->mddev->bitmap_info.space == 0 ||
641             bitmap->mddev->bitmap_info.space > sectors_reserved)
642                 bitmap->mddev->bitmap_info.space = sectors_reserved;
643         if (err)
644                 bitmap_print_sb(bitmap);
645         return err;
646 }
647
648 /*
649  * general bitmap file operations
650  */
651
652 /*
653  * on-disk bitmap:
654  *
655  * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap
656  * file a page at a time. There's a superblock at the start of the file.
657  */
658 /* calculate the index of the page that contains this bit */
659 static inline unsigned long file_page_index(struct bitmap_storage *store,
660                                             unsigned long chunk)
661 {
662         if (store->sb_page)
663                 chunk += sizeof(bitmap_super_t) << 3;
664         return chunk >> PAGE_BIT_SHIFT;
665 }
666
667 /* calculate the (bit) offset of this bit within a page */
668 static inline unsigned long file_page_offset(struct bitmap_storage *store,
669                                              unsigned long chunk)
670 {
671         if (store->sb_page)
672                 chunk += sizeof(bitmap_super_t) << 3;
673         return chunk & (PAGE_BITS - 1);
674 }
675
676 /*
677  * return a pointer to the page in the filemap that contains the given bit
678  *
679  * this lookup is complicated by the fact that the bitmap sb might be exactly
680  * 1 page (e.g., x86) or less than 1 page -- so the bitmap might start on page
681  * 0 or page 1
682  */
683 static inline struct page *filemap_get_page(struct bitmap_storage *store,
684                                             unsigned long chunk)
685 {
686         if (file_page_index(store, chunk) >= store->file_pages)
687                 return NULL;
688         return store->filemap[file_page_index(store, chunk)
689                               - file_page_index(store, 0)];
690 }
691
692 static int bitmap_storage_alloc(struct bitmap_storage *store,
693                                 unsigned long chunks, int with_super)
694 {
695         int pnum;
696         unsigned long num_pages;
697         unsigned long bytes;
698
699         bytes = DIV_ROUND_UP(chunks, 8);
700         if (with_super)
701                 bytes += sizeof(bitmap_super_t);
702
703         num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
704
705         store->filemap = kmalloc(sizeof(struct page *)
706                                  * num_pages, GFP_KERNEL);
707         if (!store->filemap)
708                 return -ENOMEM;
709
710         if (with_super && !store->sb_page) {
711                 store->sb_page = alloc_page(GFP_KERNEL|__GFP_ZERO);
712                 if (store->sb_page == NULL)
713                         return -ENOMEM;
714                 store->sb_page->index = 0;
715         }
716         pnum = 0;
717         if (store->sb_page) {
718                 store->filemap[0] = store->sb_page;
719                 pnum = 1;
720         }
721         for ( ; pnum < num_pages; pnum++) {
722                 store->filemap[pnum] = alloc_page(GFP_KERNEL|__GFP_ZERO);
723                 if (!store->filemap[pnum]) {
724                         store->file_pages = pnum;
725                         return -ENOMEM;
726                 }
727                 store->filemap[pnum]->index = pnum;
728         }
729         store->file_pages = pnum;
730
731         /* We need 4 bits per page, rounded up to a multiple
732          * of sizeof(unsigned long) */
733         store->filemap_attr = kzalloc(
734                 roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)),
735                 GFP_KERNEL);
736         if (!store->filemap_attr)
737                 return -ENOMEM;
738
739         store->bytes = bytes;
740
741         return 0;
742 }
743
744 static void bitmap_file_unmap(struct bitmap_storage *store)
745 {
746         struct page **map, *sb_page;
747         int pages;
748         struct file *file;
749
750         file = store->file;
751         map = store->filemap;
752         pages = store->file_pages;
753         sb_page = store->sb_page;
754
755         while (pages--)
756                 if (map[pages] != sb_page) /* 0 is sb_page, release it below */
757                         free_buffers(map[pages]);
758         kfree(map);
759         kfree(store->filemap_attr);
760
761         if (sb_page)
762                 free_buffers(sb_page);
763
764         if (file) {
765                 struct inode *inode = file->f_path.dentry->d_inode;
766                 invalidate_mapping_pages(inode->i_mapping, 0, -1);
767                 fput(file);
768         }
769 }
770
771 /*
772  * bitmap_file_kick - if an error occurs while manipulating the bitmap file
773  * then it is no longer reliable, so we stop using it and we mark the file
774  * as failed in the superblock
775  */
776 static void bitmap_file_kick(struct bitmap *bitmap)
777 {
778         char *path, *ptr = NULL;
779
780         if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) {
781                 bitmap_update_sb(bitmap);
782
783                 if (bitmap->storage.file) {
784                         path = kmalloc(PAGE_SIZE, GFP_KERNEL);
785                         if (path)
786                                 ptr = d_path(&bitmap->storage.file->f_path,
787                                              path, PAGE_SIZE);
788
789                         printk(KERN_ALERT
790                               "%s: kicking failed bitmap file %s from array!\n",
791                               bmname(bitmap), IS_ERR(ptr) ? "" : ptr);
792
793                         kfree(path);
794                 } else
795                         printk(KERN_ALERT
796                                "%s: disabling internal bitmap due to errors\n",
797                                bmname(bitmap));
798         }
799 }
800
801 enum bitmap_page_attr {
802         BITMAP_PAGE_DIRTY = 0,     /* there are set bits that need to be synced */
803         BITMAP_PAGE_PENDING = 1,   /* there are bits that are being cleaned.
804                                     * i.e. counter is 1 or 2. */
805         BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */
806 };
807
808 static inline void set_page_attr(struct bitmap *bitmap, int pnum,
809                                  enum bitmap_page_attr attr)
810 {
811         set_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
812 }
813
814 static inline void clear_page_attr(struct bitmap *bitmap, int pnum,
815                                    enum bitmap_page_attr attr)
816 {
817         clear_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
818 }
819
820 static inline int test_page_attr(struct bitmap *bitmap, int pnum,
821                                  enum bitmap_page_attr attr)
822 {
823         return test_bit((pnum<<2) + attr, bitmap->storage.filemap_attr);
824 }
825
826 static inline int test_and_clear_page_attr(struct bitmap *bitmap, int pnum,
827                                            enum bitmap_page_attr attr)
828 {
829         return test_and_clear_bit((pnum<<2) + attr,
830                                   bitmap->storage.filemap_attr);
831 }
832 /*
833  * bitmap_file_set_bit -- called before performing a write to the md device
834  * to set (and eventually sync) a particular bit in the bitmap file
835  *
836  * we set the bit immediately, then we record the page number so that
837  * when an unplug occurs, we can flush the dirty pages out to disk
838  */
839 static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
840 {
841         unsigned long bit;
842         struct page *page;
843         void *kaddr;
844         unsigned long chunk = block >> bitmap->counts.chunkshift;
845
846         page = filemap_get_page(&bitmap->storage, chunk);
847         if (!page)
848                 return;
849         bit = file_page_offset(&bitmap->storage, chunk);
850
851         /* set the bit */
852         kaddr = kmap_atomic(page);
853         if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
854                 set_bit(bit, kaddr);
855         else
856                 test_and_set_bit_le(bit, kaddr);
857         kunmap_atomic(kaddr);
858         pr_debug("set file bit %lu page %lu\n", bit, page->index);
859         /* record page number so it gets flushed to disk when unplug occurs */
860         set_page_attr(bitmap, page->index, BITMAP_PAGE_DIRTY);
861 }
862
863 static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
864 {
865         unsigned long bit;
866         struct page *page;
867         void *paddr;
868         unsigned long chunk = block >> bitmap->counts.chunkshift;
869
870         page = filemap_get_page(&bitmap->storage, chunk);
871         if (!page)
872                 return;
873         bit = file_page_offset(&bitmap->storage, chunk);
874         paddr = kmap_atomic(page);
875         if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
876                 clear_bit(bit, paddr);
877         else
878                 test_and_clear_bit_le(bit, paddr);
879         kunmap_atomic(paddr);
880         if (!test_page_attr(bitmap, page->index, BITMAP_PAGE_NEEDWRITE)) {
881                 set_page_attr(bitmap, page->index, BITMAP_PAGE_PENDING);
882                 bitmap->allclean = 0;
883         }
884 }
885
886 /* this gets called when the md device is ready to unplug its underlying
887  * (slave) device queues -- before we let any writes go down, we need to
888  * sync the dirty pages of the bitmap file to disk */
889 void bitmap_unplug(struct bitmap *bitmap)
890 {
891         unsigned long i;
892         int dirty, need_write;
893         int wait = 0;
894
895         if (!bitmap || !bitmap->storage.filemap ||
896             test_bit(BITMAP_STALE, &bitmap->flags))
897                 return;
898
899         /* look at each page to see if there are any set bits that need to be
900          * flushed out to disk */
901         for (i = 0; i < bitmap->storage.file_pages; i++) {
902                 if (!bitmap->storage.filemap)
903                         return;
904                 dirty = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
905                 need_write = test_and_clear_page_attr(bitmap, i,
906                                                       BITMAP_PAGE_NEEDWRITE);
907                 if (dirty || need_write) {
908                         clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING);
909                         write_page(bitmap, bitmap->storage.filemap[i], 0);
910                 }
911                 if (dirty)
912                         wait = 1;
913         }
914         if (wait) { /* if any writes were performed, we need to wait on them */
915                 if (bitmap->storage.file)
916                         wait_event(bitmap->write_wait,
917                                    atomic_read(&bitmap->pending_writes)==0);
918                 else
919                         md_super_wait(bitmap->mddev);
920         }
921         if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
922                 bitmap_file_kick(bitmap);
923 }
924 EXPORT_SYMBOL(bitmap_unplug);
925
926 static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
927 /* * bitmap_init_from_disk -- called at bitmap_create time to initialize
928  * the in-memory bitmap from the on-disk bitmap -- also, sets up the
929  * memory mapping of the bitmap file
930  * Special cases:
931  *   if there's no bitmap file, or if the bitmap file had been
932  *   previously kicked from the array, we mark all the bits as
933  *   1's in order to cause a full resync.
934  *
935  * We ignore all bits for sectors that end earlier than 'start'.
936  * This is used when reading an out-of-date bitmap...
937  */
938 static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
939 {
940         unsigned long i, chunks, index, oldindex, bit;
941         struct page *page = NULL;
942         unsigned long bit_cnt = 0;
943         struct file *file;
944         unsigned long offset;
945         int outofdate;
946         int ret = -ENOSPC;
947         void *paddr;
948         struct bitmap_storage *store = &bitmap->storage;
949
950         chunks = bitmap->counts.chunks;
951         file = store->file;
952
953         if (!file && !bitmap->mddev->bitmap_info.offset) {
954                 /* No permanent bitmap - fill with '1s'. */
955                 store->filemap = NULL;
956                 store->file_pages = 0;
957                 for (i = 0; i < chunks ; i++) {
958                         /* if the disk bit is set, set the memory bit */
959                         int needed = ((sector_t)(i+1) << (bitmap->counts.chunkshift)
960                                       >= start);
961                         bitmap_set_memory_bits(bitmap,
962                                                (sector_t)i << bitmap->counts.chunkshift,
963                                                needed);
964                 }
965                 return 0;
966         }
967
968         outofdate = test_bit(BITMAP_STALE, &bitmap->flags);
969         if (outofdate)
970                 printk(KERN_INFO "%s: bitmap file is out of date, doing full "
971                         "recovery\n", bmname(bitmap));
972
973         if (file && i_size_read(file->f_mapping->host) < store->bytes) {
974                 printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
975                        bmname(bitmap),
976                        (unsigned long) i_size_read(file->f_mapping->host),
977                        store->bytes);
978                 goto err;
979         }
980
981         oldindex = ~0L;
982         offset = 0;
983         if (!bitmap->mddev->bitmap_info.external)
984                 offset = sizeof(bitmap_super_t);
985
986         for (i = 0; i < chunks; i++) {
987                 int b;
988                 index = file_page_index(&bitmap->storage, i);
989                 bit = file_page_offset(&bitmap->storage, i);
990                 if (index != oldindex) { /* this is a new page, read it in */
991                         int count;
992                         /* unmap the old page, we're done with it */
993                         if (index == store->file_pages-1)
994                                 count = store->bytes - index * PAGE_SIZE;
995                         else
996                                 count = PAGE_SIZE;
997                         page = store->filemap[index];
998                         if (file)
999                                 ret = read_page(file, index, bitmap,
1000                                                 count, page);
1001                         else
1002                                 ret = read_sb_page(
1003                                         bitmap->mddev,
1004                                         bitmap->mddev->bitmap_info.offset,
1005                                         page,
1006                                         index, count);
1007
1008                         if (ret)
1009                                 goto err;
1010
1011                         oldindex = index;
1012
1013                         if (outofdate) {
1014                                 /*
1015                                  * if bitmap is out of date, dirty the
1016                                  * whole page and write it out
1017                                  */
1018                                 paddr = kmap_atomic(page);
1019                                 memset(paddr + offset, 0xff,
1020                                        PAGE_SIZE - offset);
1021                                 kunmap_atomic(paddr);
1022                                 write_page(bitmap, page, 1);
1023
1024                                 ret = -EIO;
1025                                 if (test_bit(BITMAP_WRITE_ERROR,
1026                                              &bitmap->flags))
1027                                         goto err;
1028                         }
1029                 }
1030                 paddr = kmap_atomic(page);
1031                 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
1032                         b = test_bit(bit, paddr);
1033                 else
1034                         b = test_bit_le(bit, paddr);
1035                 kunmap_atomic(paddr);
1036                 if (b) {
1037                         /* if the disk bit is set, set the memory bit */
1038                         int needed = ((sector_t)(i+1) << bitmap->counts.chunkshift
1039                                       >= start);
1040                         bitmap_set_memory_bits(bitmap,
1041                                                (sector_t)i << bitmap->counts.chunkshift,
1042                                                needed);
1043                         bit_cnt++;
1044                 }
1045                 offset = 0;
1046         }
1047
1048         printk(KERN_INFO "%s: bitmap initialized from disk: "
1049                "read %lu pages, set %lu of %lu bits\n",
1050                bmname(bitmap), store->file_pages,
1051                bit_cnt, chunks);
1052
1053         return 0;
1054
1055  err:
1056         printk(KERN_INFO "%s: bitmap initialisation failed: %d\n",
1057                bmname(bitmap), ret);
1058         return ret;
1059 }
1060
1061 void bitmap_write_all(struct bitmap *bitmap)
1062 {
1063         /* We don't actually write all bitmap blocks here,
1064          * just flag them as needing to be written
1065          */
1066         int i;
1067
1068         if (!bitmap || !bitmap->storage.filemap)
1069                 return;
1070         if (bitmap->storage.file)
1071                 /* Only one copy, so nothing needed */
1072                 return;
1073
1074         for (i = 0; i < bitmap->storage.file_pages; i++)
1075                 set_page_attr(bitmap, i,
1076                               BITMAP_PAGE_NEEDWRITE);
1077         bitmap->allclean = 0;
1078 }
1079
1080 static void bitmap_count_page(struct bitmap_counts *bitmap,
1081                               sector_t offset, int inc)
1082 {
1083         sector_t chunk = offset >> bitmap->chunkshift;
1084         unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1085         bitmap->bp[page].count += inc;
1086         bitmap_checkfree(bitmap, page);
1087 }
1088
1089 static void bitmap_set_pending(struct bitmap_counts *bitmap, sector_t offset)
1090 {
1091         sector_t chunk = offset >> bitmap->chunkshift;
1092         unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1093         struct bitmap_page *bp = &bitmap->bp[page];
1094
1095         if (!bp->pending)
1096                 bp->pending = 1;
1097 }
1098
1099 static bitmap_counter_t *bitmap_get_counter(struct bitmap_counts *bitmap,
1100                                             sector_t offset, sector_t *blocks,
1101                                             int create);
1102
1103 /*
1104  * bitmap daemon -- periodically wakes up to clean bits and flush pages
1105  *                      out to disk
1106  */
1107
1108 void bitmap_daemon_work(struct mddev *mddev)
1109 {
1110         struct bitmap *bitmap;
1111         unsigned long j;
1112         unsigned long nextpage;
1113         sector_t blocks;
1114         struct bitmap_counts *counts;
1115
1116         /* Use a mutex to guard daemon_work against
1117          * bitmap_destroy.
1118          */
1119         mutex_lock(&mddev->bitmap_info.mutex);
1120         bitmap = mddev->bitmap;
1121         if (bitmap == NULL) {
1122                 mutex_unlock(&mddev->bitmap_info.mutex);
1123                 return;
1124         }
1125         if (time_before(jiffies, bitmap->daemon_lastrun
1126                         + mddev->bitmap_info.daemon_sleep))
1127                 goto done;
1128
1129         bitmap->daemon_lastrun = jiffies;
1130         if (bitmap->allclean) {
1131                 mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
1132                 goto done;
1133         }
1134         bitmap->allclean = 1;
1135
1136         /* Any file-page which is PENDING now needs to be written.
1137          * So set NEEDWRITE now, then after we make any last-minute changes
1138          * we will write it.
1139          */
1140         for (j = 0; j < bitmap->storage.file_pages; j++)
1141                 if (test_and_clear_page_attr(bitmap, j,
1142                                              BITMAP_PAGE_PENDING))
1143                         set_page_attr(bitmap, j,
1144                                       BITMAP_PAGE_NEEDWRITE);
1145
1146         if (bitmap->need_sync &&
1147             mddev->bitmap_info.external == 0) {
1148                 /* Arrange for superblock update as well as
1149                  * other changes */
1150                 bitmap_super_t *sb;
1151                 bitmap->need_sync = 0;
1152                 if (bitmap->storage.filemap) {
1153                         sb = kmap_atomic(bitmap->storage.sb_page);
1154                         sb->events_cleared =
1155                                 cpu_to_le64(bitmap->events_cleared);
1156                         kunmap_atomic(sb);
1157                         set_page_attr(bitmap, 0,
1158                                       BITMAP_PAGE_NEEDWRITE);
1159                 }
1160         }
1161         /* Now look at the bitmap counters and if any are '2' or '1',
1162          * decrement and handle accordingly.
1163          */
1164         counts = &bitmap->counts;
1165         spin_lock_irq(&counts->lock);
1166         nextpage = 0;
1167         for (j = 0; j < counts->chunks; j++) {
1168                 bitmap_counter_t *bmc;
1169                 sector_t  block = (sector_t)j << counts->chunkshift;
1170
1171                 if (j == nextpage) {
1172                         nextpage += PAGE_COUNTER_RATIO;
1173                         if (!counts->bp[j >> PAGE_COUNTER_SHIFT].pending) {
1174                                 j |= PAGE_COUNTER_MASK;
1175                                 continue;
1176                         }
1177                         counts->bp[j >> PAGE_COUNTER_SHIFT].pending = 0;
1178                 }
1179                 bmc = bitmap_get_counter(counts,
1180                                          block,
1181                                          &blocks, 0);
1182
1183                 if (!bmc) {
1184                         j |= PAGE_COUNTER_MASK;
1185                         continue;
1186                 }
1187                 if (*bmc == 1 && !bitmap->need_sync) {
1188                         /* We can clear the bit */
1189                         *bmc = 0;
1190                         bitmap_count_page(counts, block, -1);
1191                         bitmap_file_clear_bit(bitmap, block);
1192                 } else if (*bmc && *bmc <= 2) {
1193                         *bmc = 1;
1194                         bitmap_set_pending(counts, block);
1195                         bitmap->allclean = 0;
1196                 }
1197         }
1198         spin_unlock_irq(&counts->lock);
1199
1200         /* Now start writeout on any page in NEEDWRITE that isn't DIRTY.
1201          * DIRTY pages need to be written by bitmap_unplug so it can wait
1202          * for them.
1203          * If we find any DIRTY page we stop there and let bitmap_unplug
1204          * handle all the rest.  This is important in the case where
1205          * the first blocking holds the superblock and it has been updated.
1206          * We mustn't write any other blocks before the superblock.
1207          */
1208         for (j = 0;
1209              j < bitmap->storage.file_pages
1210                      && !test_bit(BITMAP_STALE, &bitmap->flags);
1211              j++) {
1212
1213                 if (test_page_attr(bitmap, j,
1214                                    BITMAP_PAGE_DIRTY))
1215                         /* bitmap_unplug will handle the rest */
1216                         break;
1217                 if (test_and_clear_page_attr(bitmap, j,
1218                                              BITMAP_PAGE_NEEDWRITE)) {
1219                         write_page(bitmap, bitmap->storage.filemap[j], 0);
1220                 }
1221         }
1222
1223  done:
1224         if (bitmap->allclean == 0)
1225                 mddev->thread->timeout =
1226                         mddev->bitmap_info.daemon_sleep;
1227         mutex_unlock(&mddev->bitmap_info.mutex);
1228 }
1229
1230 static bitmap_counter_t *bitmap_get_counter(struct bitmap_counts *bitmap,
1231                                             sector_t offset, sector_t *blocks,
1232                                             int create)
1233 __releases(bitmap->lock)
1234 __acquires(bitmap->lock)
1235 {
1236         /* If 'create', we might release the lock and reclaim it.
1237          * The lock must have been taken with interrupts enabled.
1238          * If !create, we don't release the lock.
1239          */
1240         sector_t chunk = offset >> bitmap->chunkshift;
1241         unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1242         unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT;
1243         sector_t csize;
1244         int err;
1245
1246         err = bitmap_checkpage(bitmap, page, create);
1247
1248         if (bitmap->bp[page].hijacked ||
1249             bitmap->bp[page].map == NULL)
1250                 csize = ((sector_t)1) << (bitmap->chunkshift +
1251                                           PAGE_COUNTER_SHIFT - 1);
1252         else
1253                 csize = ((sector_t)1) << bitmap->chunkshift;
1254         *blocks = csize - (offset & (csize - 1));
1255
1256         if (err < 0)
1257                 return NULL;
1258
1259         /* now locked ... */
1260
1261         if (bitmap->bp[page].hijacked) { /* hijacked pointer */
1262                 /* should we use the first or second counter field
1263                  * of the hijacked pointer? */
1264                 int hi = (pageoff > PAGE_COUNTER_MASK);
1265                 return  &((bitmap_counter_t *)
1266                           &bitmap->bp[page].map)[hi];
1267         } else /* page is allocated */
1268                 return (bitmap_counter_t *)
1269                         &(bitmap->bp[page].map[pageoff]);
1270 }
1271
1272 int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind)
1273 {
1274         if (!bitmap)
1275                 return 0;
1276
1277         if (behind) {
1278                 int bw;
1279                 atomic_inc(&bitmap->behind_writes);
1280                 bw = atomic_read(&bitmap->behind_writes);
1281                 if (bw > bitmap->behind_writes_used)
1282                         bitmap->behind_writes_used = bw;
1283
1284                 pr_debug("inc write-behind count %d/%lu\n",
1285                          bw, bitmap->mddev->bitmap_info.max_write_behind);
1286         }
1287
1288         while (sectors) {
1289                 sector_t blocks;
1290                 bitmap_counter_t *bmc;
1291
1292                 spin_lock_irq(&bitmap->counts.lock);
1293                 bmc = bitmap_get_counter(&bitmap->counts, offset, &blocks, 1);
1294                 if (!bmc) {
1295                         spin_unlock_irq(&bitmap->counts.lock);
1296                         return 0;
1297                 }
1298
1299                 if (unlikely(COUNTER(*bmc) == COUNTER_MAX)) {
1300                         DEFINE_WAIT(__wait);
1301                         /* note that it is safe to do the prepare_to_wait
1302                          * after the test as long as we do it before dropping
1303                          * the spinlock.
1304                          */
1305                         prepare_to_wait(&bitmap->overflow_wait, &__wait,
1306                                         TASK_UNINTERRUPTIBLE);
1307                         spin_unlock_irq(&bitmap->counts.lock);
1308                         io_schedule();
1309                         finish_wait(&bitmap->overflow_wait, &__wait);
1310                         continue;
1311                 }
1312
1313                 switch (*bmc) {
1314                 case 0:
1315                         bitmap_file_set_bit(bitmap, offset);
1316                         bitmap_count_page(&bitmap->counts, offset, 1);
1317                         /* fall through */
1318                 case 1:
1319                         *bmc = 2;
1320                 }
1321
1322                 (*bmc)++;
1323
1324                 spin_unlock_irq(&bitmap->counts.lock);
1325
1326                 offset += blocks;
1327                 if (sectors > blocks)
1328                         sectors -= blocks;
1329                 else
1330                         sectors = 0;
1331         }
1332         return 0;
1333 }
1334 EXPORT_SYMBOL(bitmap_startwrite);
1335
1336 void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors,
1337                      int success, int behind)
1338 {
1339         if (!bitmap)
1340                 return;
1341         if (behind) {
1342                 if (atomic_dec_and_test(&bitmap->behind_writes))
1343                         wake_up(&bitmap->behind_wait);
1344                 pr_debug("dec write-behind count %d/%lu\n",
1345                          atomic_read(&bitmap->behind_writes),
1346                          bitmap->mddev->bitmap_info.max_write_behind);
1347         }
1348
1349         while (sectors) {
1350                 sector_t blocks;
1351                 unsigned long flags;
1352                 bitmap_counter_t *bmc;
1353
1354                 spin_lock_irqsave(&bitmap->counts.lock, flags);
1355                 bmc = bitmap_get_counter(&bitmap->counts, offset, &blocks, 0);
1356                 if (!bmc) {
1357                         spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1358                         return;
1359                 }
1360
1361                 if (success && !bitmap->mddev->degraded &&
1362                     bitmap->events_cleared < bitmap->mddev->events) {
1363                         bitmap->events_cleared = bitmap->mddev->events;
1364                         bitmap->need_sync = 1;
1365                         sysfs_notify_dirent_safe(bitmap->sysfs_can_clear);
1366                 }
1367
1368                 if (!success && !NEEDED(*bmc))
1369                         *bmc |= NEEDED_MASK;
1370
1371                 if (COUNTER(*bmc) == COUNTER_MAX)
1372                         wake_up(&bitmap->overflow_wait);
1373
1374                 (*bmc)--;
1375                 if (*bmc <= 2) {
1376                         bitmap_set_pending(&bitmap->counts, offset);
1377                         bitmap->allclean = 0;
1378                 }
1379                 spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1380                 offset += blocks;
1381                 if (sectors > blocks)
1382                         sectors -= blocks;
1383                 else
1384                         sectors = 0;
1385         }
1386 }
1387 EXPORT_SYMBOL(bitmap_endwrite);
1388
1389 static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
1390                                int degraded)
1391 {
1392         bitmap_counter_t *bmc;
1393         int rv;
1394         if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */
1395                 *blocks = 1024;
1396                 return 1; /* always resync if no bitmap */
1397         }
1398         spin_lock_irq(&bitmap->counts.lock);
1399         bmc = bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
1400         rv = 0;
1401         if (bmc) {
1402                 /* locked */
1403                 if (RESYNC(*bmc))
1404                         rv = 1;
1405                 else if (NEEDED(*bmc)) {
1406                         rv = 1;
1407                         if (!degraded) { /* don't set/clear bits if degraded */
1408                                 *bmc |= RESYNC_MASK;
1409                                 *bmc &= ~NEEDED_MASK;
1410                         }
1411                 }
1412         }
1413         spin_unlock_irq(&bitmap->counts.lock);
1414         return rv;
1415 }
1416
1417 int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
1418                       int degraded)
1419 {
1420         /* bitmap_start_sync must always report on multiples of whole
1421          * pages, otherwise resync (which is very PAGE_SIZE based) will
1422          * get confused.
1423          * So call __bitmap_start_sync repeatedly (if needed) until
1424          * At least PAGE_SIZE>>9 blocks are covered.
1425          * Return the 'or' of the result.
1426          */
1427         int rv = 0;
1428         sector_t blocks1;
1429
1430         *blocks = 0;
1431         while (*blocks < (PAGE_SIZE>>9)) {
1432                 rv |= __bitmap_start_sync(bitmap, offset,
1433                                           &blocks1, degraded);
1434                 offset += blocks1;
1435                 *blocks += blocks1;
1436         }
1437         return rv;
1438 }
1439 EXPORT_SYMBOL(bitmap_start_sync);
1440
1441 void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted)
1442 {
1443         bitmap_counter_t *bmc;
1444         unsigned long flags;
1445
1446         if (bitmap == NULL) {
1447                 *blocks = 1024;
1448                 return;
1449         }
1450         spin_lock_irqsave(&bitmap->counts.lock, flags);
1451         bmc = bitmap_get_counter(&bitmap->counts, offset, blocks, 0);
1452         if (bmc == NULL)
1453                 goto unlock;
1454         /* locked */
1455         if (RESYNC(*bmc)) {
1456                 *bmc &= ~RESYNC_MASK;
1457
1458                 if (!NEEDED(*bmc) && aborted)
1459                         *bmc |= NEEDED_MASK;
1460                 else {
1461                         if (*bmc <= 2) {
1462                                 bitmap_set_pending(&bitmap->counts, offset);
1463                                 bitmap->allclean = 0;
1464                         }
1465                 }
1466         }
1467  unlock:
1468         spin_unlock_irqrestore(&bitmap->counts.lock, flags);
1469 }
1470 EXPORT_SYMBOL(bitmap_end_sync);
1471
1472 void bitmap_close_sync(struct bitmap *bitmap)
1473 {
1474         /* Sync has finished, and any bitmap chunks that weren't synced
1475          * properly have been aborted.  It remains to us to clear the
1476          * RESYNC bit wherever it is still on
1477          */
1478         sector_t sector = 0;
1479         sector_t blocks;
1480         if (!bitmap)
1481                 return;
1482         while (sector < bitmap->mddev->resync_max_sectors) {
1483                 bitmap_end_sync(bitmap, sector, &blocks, 0);
1484                 sector += blocks;
1485         }
1486 }
1487 EXPORT_SYMBOL(bitmap_close_sync);
1488
1489 void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
1490 {
1491         sector_t s = 0;
1492         sector_t blocks;
1493
1494         if (!bitmap)
1495                 return;
1496         if (sector == 0) {
1497                 bitmap->last_end_sync = jiffies;
1498                 return;
1499         }
1500         if (time_before(jiffies, (bitmap->last_end_sync
1501                                   + bitmap->mddev->bitmap_info.daemon_sleep)))
1502                 return;
1503         wait_event(bitmap->mddev->recovery_wait,
1504                    atomic_read(&bitmap->mddev->recovery_active) == 0);
1505
1506         bitmap->mddev->curr_resync_completed = sector;
1507         set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags);
1508         sector &= ~((1ULL << bitmap->counts.chunkshift) - 1);
1509         s = 0;
1510         while (s < sector && s < bitmap->mddev->resync_max_sectors) {
1511                 bitmap_end_sync(bitmap, s, &blocks, 0);
1512                 s += blocks;
1513         }
1514         bitmap->last_end_sync = jiffies;
1515         sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed");
1516 }
1517 EXPORT_SYMBOL(bitmap_cond_end_sync);
1518
1519 static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed)
1520 {
1521         /* For each chunk covered by any of these sectors, set the
1522          * counter to 2 and possibly set resync_needed.  They should all
1523          * be 0 at this point
1524          */
1525
1526         sector_t secs;
1527         bitmap_counter_t *bmc;
1528         spin_lock_irq(&bitmap->counts.lock);
1529         bmc = bitmap_get_counter(&bitmap->counts, offset, &secs, 1);
1530         if (!bmc) {
1531                 spin_unlock_irq(&bitmap->counts.lock);
1532                 return;
1533         }
1534         if (!*bmc) {
1535                 *bmc = 2 | (needed ? NEEDED_MASK : 0);
1536                 bitmap_count_page(&bitmap->counts, offset, 1);
1537                 bitmap_set_pending(&bitmap->counts, offset);
1538                 bitmap->allclean = 0;
1539         }
1540         spin_unlock_irq(&bitmap->counts.lock);
1541 }
1542
1543 /* dirty the memory and file bits for bitmap chunks "s" to "e" */
1544 void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
1545 {
1546         unsigned long chunk;
1547
1548         for (chunk = s; chunk <= e; chunk++) {
1549                 sector_t sec = (sector_t)chunk << bitmap->counts.chunkshift;
1550                 bitmap_set_memory_bits(bitmap, sec, 1);
1551                 bitmap_file_set_bit(bitmap, sec);
1552                 if (sec < bitmap->mddev->recovery_cp)
1553                         /* We are asserting that the array is dirty,
1554                          * so move the recovery_cp address back so
1555                          * that it is obvious that it is dirty
1556                          */
1557                         bitmap->mddev->recovery_cp = sec;
1558         }
1559 }
1560
1561 /*
1562  * flush out any pending updates
1563  */
1564 void bitmap_flush(struct mddev *mddev)
1565 {
1566         struct bitmap *bitmap = mddev->bitmap;
1567         long sleep;
1568
1569         if (!bitmap) /* there was no bitmap */
1570                 return;
1571
1572         /* run the daemon_work three time to ensure everything is flushed
1573          * that can be
1574          */
1575         sleep = mddev->bitmap_info.daemon_sleep * 2;
1576         bitmap->daemon_lastrun -= sleep;
1577         bitmap_daemon_work(mddev);
1578         bitmap->daemon_lastrun -= sleep;
1579         bitmap_daemon_work(mddev);
1580         bitmap->daemon_lastrun -= sleep;
1581         bitmap_daemon_work(mddev);
1582         bitmap_update_sb(bitmap);
1583 }
1584
1585 /*
1586  * free memory that was allocated
1587  */
1588 static void bitmap_free(struct bitmap *bitmap)
1589 {
1590         unsigned long k, pages;
1591         struct bitmap_page *bp;
1592
1593         if (!bitmap) /* there was no bitmap */
1594                 return;
1595
1596         /* Shouldn't be needed - but just in case.... */
1597         wait_event(bitmap->write_wait,
1598                    atomic_read(&bitmap->pending_writes) == 0);
1599
1600         /* release the bitmap file  */
1601         bitmap_file_unmap(&bitmap->storage);
1602
1603         bp = bitmap->counts.bp;
1604         pages = bitmap->counts.pages;
1605
1606         /* free all allocated memory */
1607
1608         if (bp) /* deallocate the page memory */
1609                 for (k = 0; k < pages; k++)
1610                         if (bp[k].map && !bp[k].hijacked)
1611                                 kfree(bp[k].map);
1612         kfree(bp);
1613         kfree(bitmap);
1614 }
1615
1616 void bitmap_destroy(struct mddev *mddev)
1617 {
1618         struct bitmap *bitmap = mddev->bitmap;
1619
1620         if (!bitmap) /* there was no bitmap */
1621                 return;
1622
1623         mutex_lock(&mddev->bitmap_info.mutex);
1624         mddev->bitmap = NULL; /* disconnect from the md device */
1625         mutex_unlock(&mddev->bitmap_info.mutex);
1626         if (mddev->thread)
1627                 mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
1628
1629         if (bitmap->sysfs_can_clear)
1630                 sysfs_put(bitmap->sysfs_can_clear);
1631
1632         bitmap_free(bitmap);
1633 }
1634
1635 /*
1636  * initialize the bitmap structure
1637  * if this returns an error, bitmap_destroy must be called to do clean up
1638  */
1639 int bitmap_create(struct mddev *mddev)
1640 {
1641         struct bitmap *bitmap;
1642         sector_t blocks = mddev->resync_max_sectors;
1643         struct file *file = mddev->bitmap_info.file;
1644         int err;
1645         struct sysfs_dirent *bm = NULL;
1646
1647         BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);
1648
1649         BUG_ON(file && mddev->bitmap_info.offset);
1650
1651         bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
1652         if (!bitmap)
1653                 return -ENOMEM;
1654
1655         spin_lock_init(&bitmap->counts.lock);
1656         atomic_set(&bitmap->pending_writes, 0);
1657         init_waitqueue_head(&bitmap->write_wait);
1658         init_waitqueue_head(&bitmap->overflow_wait);
1659         init_waitqueue_head(&bitmap->behind_wait);
1660
1661         bitmap->mddev = mddev;
1662
1663         if (mddev->kobj.sd)
1664                 bm = sysfs_get_dirent(mddev->kobj.sd, NULL, "bitmap");
1665         if (bm) {
1666                 bitmap->sysfs_can_clear = sysfs_get_dirent(bm, NULL, "can_clear");
1667                 sysfs_put(bm);
1668         } else
1669                 bitmap->sysfs_can_clear = NULL;
1670
1671         bitmap->storage.file = file;
1672         if (file) {
1673                 get_file(file);
1674                 /* As future accesses to this file will use bmap,
1675                  * and bypass the page cache, we must sync the file
1676                  * first.
1677                  */
1678                 vfs_fsync(file, 1);
1679         }
1680         /* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */
1681         if (!mddev->bitmap_info.external) {
1682                 /*
1683                  * If 'MD_ARRAY_FIRST_USE' is set, then device-mapper is
1684                  * instructing us to create a new on-disk bitmap instance.
1685                  */
1686                 if (test_and_clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags))
1687                         err = bitmap_new_disk_sb(bitmap);
1688                 else
1689                         err = bitmap_read_sb(bitmap);
1690         } else {
1691                 err = 0;
1692                 if (mddev->bitmap_info.chunksize == 0 ||
1693                     mddev->bitmap_info.daemon_sleep == 0)
1694                         /* chunksize and time_base need to be
1695                          * set first. */
1696                         err = -EINVAL;
1697         }
1698         if (err)
1699                 goto error;
1700
1701         bitmap->daemon_lastrun = jiffies;
1702         err = bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize, 1);
1703         if (err)
1704                 goto error;
1705
1706         printk(KERN_INFO "created bitmap (%lu pages) for device %s\n",
1707                bitmap->counts.pages, bmname(bitmap));
1708
1709         mddev->bitmap = bitmap;
1710         return test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0;
1711
1712  error:
1713         bitmap_free(bitmap);
1714         return err;
1715 }
1716
1717 int bitmap_load(struct mddev *mddev)
1718 {
1719         int err = 0;
1720         sector_t start = 0;
1721         sector_t sector = 0;
1722         struct bitmap *bitmap = mddev->bitmap;
1723
1724         if (!bitmap)
1725                 goto out;
1726
1727         /* Clear out old bitmap info first:  Either there is none, or we
1728          * are resuming after someone else has possibly changed things,
1729          * so we should forget old cached info.
1730          * All chunks should be clean, but some might need_sync.
1731          */
1732         while (sector < mddev->resync_max_sectors) {
1733                 sector_t blocks;
1734                 bitmap_start_sync(bitmap, sector, &blocks, 0);
1735                 sector += blocks;
1736         }
1737         bitmap_close_sync(bitmap);
1738
1739         if (mddev->degraded == 0
1740             || bitmap->events_cleared == mddev->events)
1741                 /* no need to keep dirty bits to optimise a
1742                  * re-add of a missing device */
1743                 start = mddev->recovery_cp;
1744
1745         mutex_lock(&mddev->bitmap_info.mutex);
1746         err = bitmap_init_from_disk(bitmap, start);
1747         mutex_unlock(&mddev->bitmap_info.mutex);
1748
1749         if (err)
1750                 goto out;
1751         clear_bit(BITMAP_STALE, &bitmap->flags);
1752
1753         /* Kick recovery in case any bits were set */
1754         set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);
1755
1756         mddev->thread->timeout = mddev->bitmap_info.daemon_sleep;
1757         md_wakeup_thread(mddev->thread);
1758
1759         bitmap_update_sb(bitmap);
1760
1761         if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
1762                 err = -EIO;
1763 out:
1764         return err;
1765 }
1766 EXPORT_SYMBOL_GPL(bitmap_load);
1767
1768 void bitmap_status(struct seq_file *seq, struct bitmap *bitmap)
1769 {
1770         unsigned long chunk_kb;
1771         struct bitmap_counts *counts;
1772
1773         if (!bitmap)
1774                 return;
1775
1776         counts = &bitmap->counts;
1777
1778         chunk_kb = bitmap->mddev->bitmap_info.chunksize >> 10;
1779         seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
1780                    "%lu%s chunk",
1781                    counts->pages - counts->missing_pages,
1782                    counts->pages,
1783                    (counts->pages - counts->missing_pages)
1784                    << (PAGE_SHIFT - 10),
1785                    chunk_kb ? chunk_kb : bitmap->mddev->bitmap_info.chunksize,
1786                    chunk_kb ? "KB" : "B");
1787         if (bitmap->storage.file) {
1788                 seq_printf(seq, ", file: ");
1789                 seq_path(seq, &bitmap->storage.file->f_path, " \t\n");
1790         }
1791
1792         seq_printf(seq, "\n");
1793 }
1794
1795 int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
1796                   int chunksize, int init)
1797 {
1798         /* If chunk_size is 0, choose an appropriate chunk size.
1799          * Then possibly allocate new storage space.
1800          * Then quiesce, copy bits, replace bitmap, and re-start
1801          *
1802          * This function is called both to set up the initial bitmap
1803          * and to resize the bitmap while the array is active.
1804          * If this happens as a result of the array being resized,
1805          * chunksize will be zero, and we need to choose a suitable
1806          * chunksize, otherwise we use what we are given.
1807          */
1808         struct bitmap_storage store;
1809         struct bitmap_counts old_counts;
1810         unsigned long chunks;
1811         sector_t block;
1812         sector_t old_blocks, new_blocks;
1813         int chunkshift;
1814         int ret = 0;
1815         long pages;
1816         struct bitmap_page *new_bp;
1817
1818         if (chunksize == 0) {
1819                 /* If there is enough space, leave the chunk size unchanged,
1820                  * else increase by factor of two until there is enough space.
1821                  */
1822                 long bytes;
1823                 long space = bitmap->mddev->bitmap_info.space;
1824
1825                 if (space == 0) {
1826                         /* We don't know how much space there is, so limit
1827                          * to current size - in sectors.
1828                          */
1829                         bytes = DIV_ROUND_UP(bitmap->counts.chunks, 8);
1830                         if (!bitmap->mddev->bitmap_info.external)
1831                                 bytes += sizeof(bitmap_super_t);
1832                         space = DIV_ROUND_UP(bytes, 512);
1833                         bitmap->mddev->bitmap_info.space = space;
1834                 }
1835                 chunkshift = bitmap->counts.chunkshift;
1836                 chunkshift--;
1837                 do {
1838                         /* 'chunkshift' is shift from block size to chunk size */
1839                         chunkshift++;
1840                         chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift);
1841                         bytes = DIV_ROUND_UP(chunks, 8);
1842                         if (!bitmap->mddev->bitmap_info.external)
1843                                 bytes += sizeof(bitmap_super_t);
1844                 } while (bytes > (space << 9));
1845         } else
1846                 chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT;
1847
1848         chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift);
1849         memset(&store, 0, sizeof(store));
1850         if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file)
1851                 ret = bitmap_storage_alloc(&store, chunks,
1852                                            !bitmap->mddev->bitmap_info.external);
1853         if (ret)
1854                 goto err;
1855
1856         pages = DIV_ROUND_UP(chunks, PAGE_COUNTER_RATIO);
1857
1858         new_bp = kzalloc(pages * sizeof(*new_bp), GFP_KERNEL);
1859         ret = -ENOMEM;
1860         if (!new_bp) {
1861                 bitmap_file_unmap(&store);
1862                 goto err;
1863         }
1864
1865         if (!init)
1866                 bitmap->mddev->pers->quiesce(bitmap->mddev, 1);
1867
1868         store.file = bitmap->storage.file;
1869         bitmap->storage.file = NULL;
1870
1871         if (store.sb_page && bitmap->storage.sb_page)
1872                 memcpy(page_address(store.sb_page),
1873                        page_address(bitmap->storage.sb_page),
1874                        sizeof(bitmap_super_t));
1875         bitmap_file_unmap(&bitmap->storage);
1876         bitmap->storage = store;
1877
1878         old_counts = bitmap->counts;
1879         bitmap->counts.bp = new_bp;
1880         bitmap->counts.pages = pages;
1881         bitmap->counts.missing_pages = pages;
1882         bitmap->counts.chunkshift = chunkshift;
1883         bitmap->counts.chunks = chunks;
1884         bitmap->mddev->bitmap_info.chunksize = 1 << (chunkshift +
1885                                                      BITMAP_BLOCK_SHIFT);
1886
1887         blocks = min(old_counts.chunks << old_counts.chunkshift,
1888                      chunks << chunkshift);
1889
1890         spin_lock_irq(&bitmap->counts.lock);
1891         for (block = 0; block < blocks; ) {
1892                 bitmap_counter_t *bmc_old, *bmc_new;
1893                 int set;
1894
1895                 bmc_old = bitmap_get_counter(&old_counts, block,
1896                                              &old_blocks, 0);
1897                 set = bmc_old && NEEDED(*bmc_old);
1898
1899                 if (set) {
1900                         bmc_new = bitmap_get_counter(&bitmap->counts, block,
1901                                                      &new_blocks, 1);
1902                         if (*bmc_new == 0) {
1903                                 /* need to set on-disk bits too. */
1904                                 sector_t end = block + new_blocks;
1905                                 sector_t start = block >> chunkshift;
1906                                 start <<= chunkshift;
1907                                 while (start < end) {
1908                                         bitmap_file_set_bit(bitmap, block);
1909                                         start += 1 << chunkshift;
1910                                 }
1911                                 *bmc_new = 2;
1912                                 bitmap_count_page(&bitmap->counts,
1913                                                   block, 1);
1914                                 bitmap_set_pending(&bitmap->counts,
1915                                                    block);
1916                         }
1917                         *bmc_new |= NEEDED_MASK;
1918                         if (new_blocks < old_blocks)
1919                                 old_blocks = new_blocks;
1920                 }
1921                 block += old_blocks;
1922         }
1923
1924         if (!init) {
1925                 int i;
1926                 while (block < (chunks << chunkshift)) {
1927                         bitmap_counter_t *bmc;
1928                         bmc = bitmap_get_counter(&bitmap->counts, block,
1929                                                  &new_blocks, 1);
1930                         if (bmc) {
1931                                 /* new space.  It needs to be resynced, so
1932                                  * we set NEEDED_MASK.
1933                                  */
1934                                 if (*bmc == 0) {
1935                                         *bmc = NEEDED_MASK | 2;
1936                                         bitmap_count_page(&bitmap->counts,
1937                                                           block, 1);
1938                                         bitmap_set_pending(&bitmap->counts,
1939                                                            block);
1940                                 }
1941                         }
1942                         block += new_blocks;
1943                 }
1944                 for (i = 0; i < bitmap->storage.file_pages; i++)
1945                         set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
1946         }
1947         spin_unlock_irq(&bitmap->counts.lock);
1948
1949         if (!init) {
1950                 bitmap_unplug(bitmap);
1951                 bitmap->mddev->pers->quiesce(bitmap->mddev, 0);
1952         }
1953         ret = 0;
1954 err:
1955         return ret;
1956 }
1957 EXPORT_SYMBOL_GPL(bitmap_resize);
1958
1959 static ssize_t
1960 location_show(struct mddev *mddev, char *page)
1961 {
1962         ssize_t len;
1963         if (mddev->bitmap_info.file)
1964                 len = sprintf(page, "file");
1965         else if (mddev->bitmap_info.offset)
1966                 len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset);
1967         else
1968                 len = sprintf(page, "none");
1969         len += sprintf(page+len, "\n");
1970         return len;
1971 }
1972
1973 static ssize_t
1974 location_store(struct mddev *mddev, const char *buf, size_t len)
1975 {
1976
1977         if (mddev->pers) {
1978                 if (!mddev->pers->quiesce)
1979                         return -EBUSY;
1980                 if (mddev->recovery || mddev->sync_thread)
1981                         return -EBUSY;
1982         }
1983
1984         if (mddev->bitmap || mddev->bitmap_info.file ||
1985             mddev->bitmap_info.offset) {
1986                 /* bitmap already configured.  Only option is to clear it */
1987                 if (strncmp(buf, "none", 4) != 0)
1988                         return -EBUSY;
1989                 if (mddev->pers) {
1990                         mddev->pers->quiesce(mddev, 1);
1991                         bitmap_destroy(mddev);
1992                         mddev->pers->quiesce(mddev, 0);
1993                 }
1994                 mddev->bitmap_info.offset = 0;
1995                 if (mddev->bitmap_info.file) {
1996                         struct file *f = mddev->bitmap_info.file;
1997                         mddev->bitmap_info.file = NULL;
1998                         restore_bitmap_write_access(f);
1999                         fput(f);
2000                 }
2001         } else {
2002                 /* No bitmap, OK to set a location */
2003                 long long offset;
2004                 if (strncmp(buf, "none", 4) == 0)
2005                         /* nothing to be done */;
2006                 else if (strncmp(buf, "file:", 5) == 0) {
2007                         /* Not supported yet */
2008                         return -EINVAL;
2009                 } else {
2010                         int rv;
2011                         if (buf[0] == '+')
2012                                 rv = strict_strtoll(buf+1, 10, &offset);
2013                         else
2014                                 rv = strict_strtoll(buf, 10, &offset);
2015                         if (rv)
2016                                 return rv;
2017                         if (offset == 0)
2018                                 return -EINVAL;
2019                         if (mddev->bitmap_info.external == 0 &&
2020                             mddev->major_version == 0 &&
2021                             offset != mddev->bitmap_info.default_offset)
2022                                 return -EINVAL;
2023                         mddev->bitmap_info.offset = offset;
2024                         if (mddev->pers) {
2025                                 mddev->pers->quiesce(mddev, 1);
2026                                 rv = bitmap_create(mddev);
2027                                 if (!rv)
2028                                         rv = bitmap_load(mddev);
2029                                 if (rv) {
2030                                         bitmap_destroy(mddev);
2031                                         mddev->bitmap_info.offset = 0;
2032                                 }
2033                                 mddev->pers->quiesce(mddev, 0);
2034                                 if (rv)
2035                                         return rv;
2036                         }
2037                 }
2038         }
2039         if (!mddev->external) {
2040                 /* Ensure new bitmap info is stored in
2041                  * metadata promptly.
2042                  */
2043                 set_bit(MD_CHANGE_DEVS, &mddev->flags);
2044                 md_wakeup_thread(mddev->thread);
2045         }
2046         return len;
2047 }
2048
2049 static struct md_sysfs_entry bitmap_location =
2050 __ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store);
2051
2052 /* 'bitmap/space' is the space available at 'location' for the
2053  * bitmap.  This allows the kernel to know when it is safe to
2054  * resize the bitmap to match a resized array.
2055  */
2056 static ssize_t
2057 space_show(struct mddev *mddev, char *page)
2058 {
2059         return sprintf(page, "%lu\n", mddev->bitmap_info.space);
2060 }
2061
2062 static ssize_t
2063 space_store(struct mddev *mddev, const char *buf, size_t len)
2064 {
2065         unsigned long sectors;
2066         int rv;
2067
2068         rv = kstrtoul(buf, 10, &sectors);
2069         if (rv)
2070                 return rv;
2071
2072         if (sectors == 0)
2073                 return -EINVAL;
2074
2075         if (mddev->bitmap &&
2076             sectors < (mddev->bitmap->storage.bytes + 511) >> 9)
2077                 return -EFBIG; /* Bitmap is too big for this small space */
2078
2079         /* could make sure it isn't too big, but that isn't really
2080          * needed - user-space should be careful.
2081          */
2082         mddev->bitmap_info.space = sectors;
2083         return len;
2084 }
2085
2086 static struct md_sysfs_entry bitmap_space =
2087 __ATTR(space, S_IRUGO|S_IWUSR, space_show, space_store);
2088
2089 static ssize_t
2090 timeout_show(struct mddev *mddev, char *page)
2091 {
2092         ssize_t len;
2093         unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ;
2094         unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ;
2095
2096         len = sprintf(page, "%lu", secs);
2097         if (jifs)
2098                 len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs));
2099         len += sprintf(page+len, "\n");
2100         return len;
2101 }
2102
2103 static ssize_t
2104 timeout_store(struct mddev *mddev, const char *buf, size_t len)
2105 {
2106         /* timeout can be set at any time */
2107         unsigned long timeout;
2108         int rv = strict_strtoul_scaled(buf, &timeout, 4);
2109         if (rv)
2110                 return rv;
2111
2112         /* just to make sure we don't overflow... */
2113         if (timeout >= LONG_MAX / HZ)
2114                 return -EINVAL;
2115
2116         timeout = timeout * HZ / 10000;
2117
2118         if (timeout >= MAX_SCHEDULE_TIMEOUT)
2119                 timeout = MAX_SCHEDULE_TIMEOUT-1;
2120         if (timeout < 1)
2121                 timeout = 1;
2122         mddev->bitmap_info.daemon_sleep = timeout;
2123         if (mddev->thread) {
2124                 /* if thread->timeout is MAX_SCHEDULE_TIMEOUT, then
2125                  * the bitmap is all clean and we don't need to
2126                  * adjust the timeout right now
2127                  */
2128                 if (mddev->thread->timeout < MAX_SCHEDULE_TIMEOUT) {
2129                         mddev->thread->timeout = timeout;
2130                         md_wakeup_thread(mddev->thread);
2131                 }
2132         }
2133         return len;
2134 }
2135
2136 static struct md_sysfs_entry bitmap_timeout =
2137 __ATTR(time_base, S_IRUGO|S_IWUSR, timeout_show, timeout_store);
2138
2139 static ssize_t
2140 backlog_show(struct mddev *mddev, char *page)
2141 {
2142         return sprintf(page, "%lu\n", mddev->bitmap_info.max_write_behind);
2143 }
2144
2145 static ssize_t
2146 backlog_store(struct mddev *mddev, const char *buf, size_t len)
2147 {
2148         unsigned long backlog;
2149         int rv = strict_strtoul(buf, 10, &backlog);
2150         if (rv)
2151                 return rv;
2152         if (backlog > COUNTER_MAX)
2153                 return -EINVAL;
2154         mddev->bitmap_info.max_write_behind = backlog;
2155         return len;
2156 }
2157
2158 static struct md_sysfs_entry bitmap_backlog =
2159 __ATTR(backlog, S_IRUGO|S_IWUSR, backlog_show, backlog_store);
2160
2161 static ssize_t
2162 chunksize_show(struct mddev *mddev, char *page)
2163 {
2164         return sprintf(page, "%lu\n", mddev->bitmap_info.chunksize);
2165 }
2166
2167 static ssize_t
2168 chunksize_store(struct mddev *mddev, const char *buf, size_t len)
2169 {
2170         /* Can only be changed when no bitmap is active */
2171         int rv;
2172         unsigned long csize;
2173         if (mddev->bitmap)
2174                 return -EBUSY;
2175         rv = strict_strtoul(buf, 10, &csize);
2176         if (rv)
2177                 return rv;
2178         if (csize < 512 ||
2179             !is_power_of_2(csize))
2180                 return -EINVAL;
2181         mddev->bitmap_info.chunksize = csize;
2182         return len;
2183 }
2184
2185 static struct md_sysfs_entry bitmap_chunksize =
2186 __ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);
2187
2188 static ssize_t metadata_show(struct mddev *mddev, char *page)
2189 {
2190         return sprintf(page, "%s\n", (mddev->bitmap_info.external
2191                                       ? "external" : "internal"));
2192 }
2193
2194 static ssize_t metadata_store(struct mddev *mddev, const char *buf, size_t len)
2195 {
2196         if (mddev->bitmap ||
2197             mddev->bitmap_info.file ||
2198             mddev->bitmap_info.offset)
2199                 return -EBUSY;
2200         if (strncmp(buf, "external", 8) == 0)
2201                 mddev->bitmap_info.external = 1;
2202         else if (strncmp(buf, "internal", 8) == 0)
2203                 mddev->bitmap_info.external = 0;
2204         else
2205                 return -EINVAL;
2206         return len;
2207 }
2208
2209 static struct md_sysfs_entry bitmap_metadata =
2210 __ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
2211
2212 static ssize_t can_clear_show(struct mddev *mddev, char *page)
2213 {
2214         int len;
2215         if (mddev->bitmap)
2216                 len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ?
2217                                              "false" : "true"));
2218         else
2219                 len = sprintf(page, "\n");
2220         return len;
2221 }
2222
2223 static ssize_t can_clear_store(struct mddev *mddev, const char *buf, size_t len)
2224 {
2225         if (mddev->bitmap == NULL)
2226                 return -ENOENT;
2227         if (strncmp(buf, "false", 5) == 0)
2228                 mddev->bitmap->need_sync = 1;
2229         else if (strncmp(buf, "true", 4) == 0) {
2230                 if (mddev->degraded)
2231                         return -EBUSY;
2232                 mddev->bitmap->need_sync = 0;
2233         } else
2234                 return -EINVAL;
2235         return len;
2236 }
2237
2238 static struct md_sysfs_entry bitmap_can_clear =
2239 __ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store);
2240
2241 static ssize_t
2242 behind_writes_used_show(struct mddev *mddev, char *page)
2243 {
2244         if (mddev->bitmap == NULL)
2245                 return sprintf(page, "0\n");
2246         return sprintf(page, "%lu\n",
2247                        mddev->bitmap->behind_writes_used);
2248 }
2249
2250 static ssize_t
2251 behind_writes_used_reset(struct mddev *mddev, const char *buf, size_t len)
2252 {
2253         if (mddev->bitmap)
2254                 mddev->bitmap->behind_writes_used = 0;
2255         return len;
2256 }
2257
2258 static struct md_sysfs_entry max_backlog_used =
2259 __ATTR(max_backlog_used, S_IRUGO | S_IWUSR,
2260        behind_writes_used_show, behind_writes_used_reset);
2261
2262 static struct attribute *md_bitmap_attrs[] = {
2263         &bitmap_location.attr,
2264         &bitmap_space.attr,
2265         &bitmap_timeout.attr,
2266         &bitmap_backlog.attr,
2267         &bitmap_chunksize.attr,
2268         &bitmap_metadata.attr,
2269         &bitmap_can_clear.attr,
2270         &max_backlog_used.attr,
2271         NULL
2272 };
2273 struct attribute_group md_bitmap_group = {
2274         .name = "bitmap",
2275         .attrs = md_bitmap_attrs,
2276 };
2277