Btrfs, scrub: uninitialized variable in scrub_extent_for_parity()
[cascardo/linux.git] / fs / btrfs / scrub.c
index efa0831..9e1569f 100644 (file)
@@ -63,10 +63,18 @@ struct scrub_ctx;
  */
 #define SCRUB_MAX_PAGES_PER_BLOCK      16      /* 64k per node/leaf/sector */
 
+struct scrub_recover {
+       atomic_t                refs;
+       struct btrfs_bio        *bbio;
+       u64                     *raid_map;
+       u64                     map_length;
+};
+
 struct scrub_page {
        struct scrub_block      *sblock;
        struct page             *page;
        struct btrfs_device     *dev;
+       struct list_head        list;
        u64                     flags;  /* extent flags */
        u64                     generation;
        u64                     logical;
@@ -79,6 +87,8 @@ struct scrub_page {
                unsigned int    io_error:1;
        };
        u8                      csum[BTRFS_CSUM_SIZE];
+
+       struct scrub_recover    *recover;
 };
 
 struct scrub_bio {
@@ -105,14 +115,52 @@ struct scrub_block {
        atomic_t                outstanding_pages;
        atomic_t                ref_count; /* free mem on transition to zero */
        struct scrub_ctx        *sctx;
+       struct scrub_parity     *sparity;
        struct {
                unsigned int    header_error:1;
                unsigned int    checksum_error:1;
                unsigned int    no_io_error_seen:1;
                unsigned int    generation_error:1; /* also sets header_error */
+
+               /* The following is for the data used to check parity */
+               /* It is for the data with checksum */
+               unsigned int    data_corrected:1;
        };
 };
 
+/* Used for the chunks with parity stripe such RAID5/6 */
+struct scrub_parity {
+       struct scrub_ctx        *sctx;
+
+       struct btrfs_device     *scrub_dev;
+
+       u64                     logic_start;
+
+       u64                     logic_end;
+
+       int                     nsectors;
+
+       int                     stripe_len;
+
+       atomic_t                ref_count;
+
+       struct list_head        spages;
+
+       /* Work of parity check and repair */
+       struct btrfs_work       work;
+
+       /* Mark the parity blocks which have data */
+       unsigned long           *dbitmap;
+
+       /*
+        * Mark the parity blocks which have data, but errors happen when
+        * read data or check data
+        */
+       unsigned long           *ebitmap;
+
+       unsigned long           bitmap[0];
+};
+
 struct scrub_wr_ctx {
        struct scrub_bio *wr_curr_bio;
        struct btrfs_device *tgtdev;
@@ -196,7 +244,7 @@ static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
 static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
                                struct scrub_block *sblock, int is_metadata,
                                int have_csum, u8 *csum, u64 generation,
-                               u16 csum_size);
+                               u16 csum_size, int retry_failed_mirror);
 static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
                                         struct scrub_block *sblock,
                                         int is_metadata, int have_csum,
@@ -218,6 +266,8 @@ static void scrub_block_get(struct scrub_block *sblock);
 static void scrub_block_put(struct scrub_block *sblock);
 static void scrub_page_get(struct scrub_page *spage);
 static void scrub_page_put(struct scrub_page *spage);
+static void scrub_parity_get(struct scrub_parity *sparity);
+static void scrub_parity_put(struct scrub_parity *sparity);
 static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
                                    struct scrub_page *spage);
 static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
@@ -790,6 +840,20 @@ out:
        scrub_pending_trans_workers_dec(sctx);
 }
 
+static inline void scrub_get_recover(struct scrub_recover *recover)
+{
+       atomic_inc(&recover->refs);
+}
+
+static inline void scrub_put_recover(struct scrub_recover *recover)
+{
+       if (atomic_dec_and_test(&recover->refs)) {
+               kfree(recover->bbio);
+               kfree(recover->raid_map);
+               kfree(recover);
+       }
+}
+
 /*
  * scrub_handle_errored_block gets called when either verification of the
  * pages failed or the bio failed to read, e.g. with EIO. In the latter
@@ -906,7 +970,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
 
        /* build and submit the bios for the failed mirror, check checksums */
        scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum,
-                           csum, generation, sctx->csum_size);
+                           csum, generation, sctx->csum_size, 1);
 
        if (!sblock_bad->header_error && !sblock_bad->checksum_error &&
            sblock_bad->no_io_error_seen) {
@@ -920,6 +984,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
                 */
                spin_lock(&sctx->stat_lock);
                sctx->stat.unverified_errors++;
+               sblock_to_check->data_corrected = 1;
                spin_unlock(&sctx->stat_lock);
 
                if (sctx->is_dev_replace)
@@ -1019,7 +1084,7 @@ nodatasum_case:
                /* build and submit the bios, check checksums */
                scrub_recheck_block(fs_info, sblock_other, is_metadata,
                                    have_csum, csum, generation,
-                                   sctx->csum_size);
+                                   sctx->csum_size, 0);
 
                if (!sblock_other->header_error &&
                    !sblock_other->checksum_error &&
@@ -1169,7 +1234,7 @@ nodatasum_case:
                         */
                        scrub_recheck_block(fs_info, sblock_bad,
                                            is_metadata, have_csum, csum,
-                                           generation, sctx->csum_size);
+                                           generation, sctx->csum_size, 1);
                        if (!sblock_bad->header_error &&
                            !sblock_bad->checksum_error &&
                            sblock_bad->no_io_error_seen)
@@ -1180,6 +1245,7 @@ nodatasum_case:
 corrected_error:
                        spin_lock(&sctx->stat_lock);
                        sctx->stat.corrected_errors++;
+                       sblock_to_check->data_corrected = 1;
                        spin_unlock(&sctx->stat_lock);
                        printk_ratelimited_in_rcu(KERN_ERR
                                "BTRFS: fixed up error at logical %llu on dev %s\n",
@@ -1201,11 +1267,18 @@ out:
                     mirror_index++) {
                        struct scrub_block *sblock = sblocks_for_recheck +
                                                     mirror_index;
+                       struct scrub_recover *recover;
                        int page_index;
 
                        for (page_index = 0; page_index < sblock->page_count;
                             page_index++) {
                                sblock->pagev[page_index]->sblock = NULL;
+                               recover = sblock->pagev[page_index]->recover;
+                               if (recover) {
+                                       scrub_put_recover(recover);
+                                       sblock->pagev[page_index]->recover =
+                                                                       NULL;
+                               }
                                scrub_page_put(sblock->pagev[page_index]);
                        }
                }
@@ -1215,14 +1288,63 @@ out:
        return 0;
 }
 
+static inline int scrub_nr_raid_mirrors(struct btrfs_bio *bbio, u64 *raid_map)
+{
+       if (raid_map) {
+               if (raid_map[bbio->num_stripes - 1] == RAID6_Q_STRIPE)
+                       return 3;
+               else
+                       return 2;
+       } else {
+               return (int)bbio->num_stripes;
+       }
+}
+
+static inline void scrub_stripe_index_and_offset(u64 logical, u64 *raid_map,
+                                                u64 mapped_length,
+                                                int nstripes, int mirror,
+                                                int *stripe_index,
+                                                u64 *stripe_offset)
+{
+       int i;
+
+       if (raid_map) {
+               /* RAID5/6 */
+               for (i = 0; i < nstripes; i++) {
+                       if (raid_map[i] == RAID6_Q_STRIPE ||
+                           raid_map[i] == RAID5_P_STRIPE)
+                               continue;
+
+                       if (logical >= raid_map[i] &&
+                           logical < raid_map[i] + mapped_length)
+                               break;
+               }
+
+               *stripe_index = i;
+               *stripe_offset = logical - raid_map[i];
+       } else {
+               /* The other RAID type */
+               *stripe_index = mirror;
+               *stripe_offset = 0;
+       }
+}
+
 static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
                                     struct btrfs_fs_info *fs_info,
                                     struct scrub_block *original_sblock,
                                     u64 length, u64 logical,
                                     struct scrub_block *sblocks_for_recheck)
 {
+       struct scrub_recover *recover;
+       struct btrfs_bio *bbio;
+       u64 *raid_map;
+       u64 sublen;
+       u64 mapped_length;
+       u64 stripe_offset;
+       int stripe_index;
        int page_index;
        int mirror_index;
+       int nmirrors;
        int ret;
 
        /*
@@ -1233,23 +1355,39 @@ static int scrub_setup_recheck_block(struct scrub_ctx *sctx,
 
        page_index = 0;
        while (length > 0) {
-               u64 sublen = min_t(u64, length, PAGE_SIZE);
-               u64 mapped_length = sublen;
-               struct btrfs_bio *bbio = NULL;
+               sublen = min_t(u64, length, PAGE_SIZE);
+               mapped_length = sublen;
+               bbio = NULL;
+               raid_map = NULL;
 
                /*
                 * with a length of PAGE_SIZE, each returned stripe
                 * represents one mirror
                 */
-               ret = btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, logical,
-                                     &mapped_length, &bbio, 0);
+               ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical,
+                                      &mapped_length, &bbio, 0, &raid_map);
                if (ret || !bbio || mapped_length < sublen) {
                        kfree(bbio);
+                       kfree(raid_map);
                        return -EIO;
                }
 
+               recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS);
+               if (!recover) {
+                       kfree(bbio);
+                       kfree(raid_map);
+                       return -ENOMEM;
+               }
+
+               atomic_set(&recover->refs, 1);
+               recover->bbio = bbio;
+               recover->raid_map = raid_map;
+               recover->map_length = mapped_length;
+
                BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO);
-               for (mirror_index = 0; mirror_index < (int)bbio->num_stripes;
+
+               nmirrors = scrub_nr_raid_mirrors(bbio, raid_map);
+               for (mirror_index = 0; mirror_index < nmirrors;
                     mirror_index++) {
                        struct scrub_block *sblock;
                        struct scrub_page *page;
@@ -1265,26 +1403,38 @@ leave_nomem:
                                spin_lock(&sctx->stat_lock);
                                sctx->stat.malloc_errors++;
                                spin_unlock(&sctx->stat_lock);
-                               kfree(bbio);
+                               scrub_put_recover(recover);
                                return -ENOMEM;
                        }
                        scrub_page_get(page);
                        sblock->pagev[page_index] = page;
                        page->logical = logical;
-                       page->physical = bbio->stripes[mirror_index].physical;
+
+                       scrub_stripe_index_and_offset(logical, raid_map,
+                                                     mapped_length,
+                                                     bbio->num_stripes,
+                                                     mirror_index,
+                                                     &stripe_index,
+                                                     &stripe_offset);
+                       page->physical = bbio->stripes[stripe_index].physical +
+                                        stripe_offset;
+                       page->dev = bbio->stripes[stripe_index].dev;
+
                        BUG_ON(page_index >= original_sblock->page_count);
                        page->physical_for_dev_replace =
                                original_sblock->pagev[page_index]->
                                physical_for_dev_replace;
                        /* for missing devices, dev->bdev is NULL */
-                       page->dev = bbio->stripes[mirror_index].dev;
                        page->mirror_num = mirror_index + 1;
                        sblock->page_count++;
                        page->page = alloc_page(GFP_NOFS);
                        if (!page->page)
                                goto leave_nomem;
+
+                       scrub_get_recover(recover);
+                       page->recover = recover;
                }
-               kfree(bbio);
+               scrub_put_recover(recover);
                length -= sublen;
                logical += sublen;
                page_index++;
@@ -1293,6 +1443,51 @@ leave_nomem:
        return 0;
 }
 
+struct scrub_bio_ret {
+       struct completion event;
+       int error;
+};
+
+static void scrub_bio_wait_endio(struct bio *bio, int error)
+{
+       struct scrub_bio_ret *ret = bio->bi_private;
+
+       ret->error = error;
+       complete(&ret->event);
+}
+
+static inline int scrub_is_page_on_raid56(struct scrub_page *page)
+{
+       return page->recover && page->recover->raid_map;
+}
+
+static int scrub_submit_raid56_bio_wait(struct btrfs_fs_info *fs_info,
+                                       struct bio *bio,
+                                       struct scrub_page *page)
+{
+       struct scrub_bio_ret done;
+       int ret;
+
+       init_completion(&done.event);
+       done.error = 0;
+       bio->bi_iter.bi_sector = page->logical >> 9;
+       bio->bi_private = &done;
+       bio->bi_end_io = scrub_bio_wait_endio;
+
+       ret = raid56_parity_recover(fs_info->fs_root, bio, page->recover->bbio,
+                                   page->recover->raid_map,
+                                   page->recover->map_length,
+                                   page->mirror_num, 0);
+       if (ret)
+               return ret;
+
+       wait_for_completion(&done.event);
+       if (done.error)
+               return -EIO;
+
+       return 0;
+}
+
 /*
  * this function will check the on disk data for checksum errors, header
  * errors and read I/O errors. If any I/O errors happen, the exact pages
@@ -1303,7 +1498,7 @@ leave_nomem:
 static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
                                struct scrub_block *sblock, int is_metadata,
                                int have_csum, u8 *csum, u64 generation,
-                               u16 csum_size)
+                               u16 csum_size, int retry_failed_mirror)
 {
        int page_num;
 
@@ -1329,11 +1524,17 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
                        continue;
                }
                bio->bi_bdev = page->dev->bdev;
-               bio->bi_iter.bi_sector = page->physical >> 9;
 
                bio_add_page(bio, page->page, PAGE_SIZE, 0);
-               if (btrfsic_submit_bio_wait(READ, bio))
-                       sblock->no_io_error_seen = 0;
+               if (!retry_failed_mirror && scrub_is_page_on_raid56(page)) {
+                       if (scrub_submit_raid56_bio_wait(fs_info, bio, page))
+                               sblock->no_io_error_seen = 0;
+               } else {
+                       bio->bi_iter.bi_sector = page->physical >> 9;
+
+                       if (btrfsic_submit_bio_wait(READ, bio))
+                               sblock->no_io_error_seen = 0;
+               }
 
                bio_put(bio);
        }
@@ -1486,6 +1687,13 @@ static void scrub_write_block_to_dev_replace(struct scrub_block *sblock)
 {
        int page_num;
 
+       /*
+        * This block is used for the check of the parity on the source device,
+        * so the data needn't be written into the destination device.
+        */
+       if (sblock->sparity)
+               return;
+
        for (page_num = 0; page_num < sblock->page_count; page_num++) {
                int ret;
 
@@ -1867,6 +2075,9 @@ static void scrub_block_put(struct scrub_block *sblock)
        if (atomic_dec_and_test(&sblock->ref_count)) {
                int i;
 
+               if (sblock->sparity)
+                       scrub_parity_put(sblock->sparity);
+
                for (i = 0; i < sblock->page_count; i++)
                        scrub_page_put(sblock->pagev[i]);
                kfree(sblock);
@@ -2124,9 +2335,51 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work)
        scrub_pending_bio_dec(sctx);
 }
 
+static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
+                                      unsigned long *bitmap,
+                                      u64 start, u64 len)
+{
+       int offset;
+       int nsectors;
+       int sectorsize = sparity->sctx->dev_root->sectorsize;
+
+       if (len >= sparity->stripe_len) {
+               bitmap_set(bitmap, 0, sparity->nsectors);
+               return;
+       }
+
+       start -= sparity->logic_start;
+       offset = (int)do_div(start, sparity->stripe_len);
+       offset /= sectorsize;
+       nsectors = (int)len / sectorsize;
+
+       if (offset + nsectors <= sparity->nsectors) {
+               bitmap_set(bitmap, offset, nsectors);
+               return;
+       }
+
+       bitmap_set(bitmap, offset, sparity->nsectors - offset);
+       bitmap_set(bitmap, 0, nsectors - (sparity->nsectors - offset));
+}
+
+static inline void scrub_parity_mark_sectors_error(struct scrub_parity *sparity,
+                                                  u64 start, u64 len)
+{
+       __scrub_mark_bitmap(sparity, sparity->ebitmap, start, len);
+}
+
+static inline void scrub_parity_mark_sectors_data(struct scrub_parity *sparity,
+                                                 u64 start, u64 len)
+{
+       __scrub_mark_bitmap(sparity, sparity->dbitmap, start, len);
+}
+
 static void scrub_block_complete(struct scrub_block *sblock)
 {
+       int corrupted = 0;
+
        if (!sblock->no_io_error_seen) {
+               corrupted = 1;
                scrub_handle_errored_block(sblock);
        } else {
                /*
@@ -2134,9 +2387,19 @@ static void scrub_block_complete(struct scrub_block *sblock)
                 * dev replace case, otherwise write here in dev replace
                 * case.
                 */
-               if (!scrub_checksum(sblock) && sblock->sctx->is_dev_replace)
+               corrupted = scrub_checksum(sblock);
+               if (!corrupted && sblock->sctx->is_dev_replace)
                        scrub_write_block_to_dev_replace(sblock);
        }
+
+       if (sblock->sparity && corrupted && !sblock->data_corrected) {
+               u64 start = sblock->pagev[0]->logical;
+               u64 end = sblock->pagev[sblock->page_count - 1]->logical +
+                         PAGE_SIZE;
+
+               scrub_parity_mark_sectors_error(sblock->sparity,
+                                               start, end - start);
+       }
 }
 
 static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len,
@@ -2228,6 +2491,132 @@ behind_scrub_pages:
        return 0;
 }
 
+static int scrub_pages_for_parity(struct scrub_parity *sparity,
+                                 u64 logical, u64 len,
+                                 u64 physical, struct btrfs_device *dev,
+                                 u64 flags, u64 gen, int mirror_num, u8 *csum)
+{
+       struct scrub_ctx *sctx = sparity->sctx;
+       struct scrub_block *sblock;
+       int index;
+
+       sblock = kzalloc(sizeof(*sblock), GFP_NOFS);
+       if (!sblock) {
+               spin_lock(&sctx->stat_lock);
+               sctx->stat.malloc_errors++;
+               spin_unlock(&sctx->stat_lock);
+               return -ENOMEM;
+       }
+
+       /* one ref inside this function, plus one for each page added to
+        * a bio later on */
+       atomic_set(&sblock->ref_count, 1);
+       sblock->sctx = sctx;
+       sblock->no_io_error_seen = 1;
+       sblock->sparity = sparity;
+       scrub_parity_get(sparity);
+
+       for (index = 0; len > 0; index++) {
+               struct scrub_page *spage;
+               u64 l = min_t(u64, len, PAGE_SIZE);
+
+               spage = kzalloc(sizeof(*spage), GFP_NOFS);
+               if (!spage) {
+leave_nomem:
+                       spin_lock(&sctx->stat_lock);
+                       sctx->stat.malloc_errors++;
+                       spin_unlock(&sctx->stat_lock);
+                       scrub_block_put(sblock);
+                       return -ENOMEM;
+               }
+               BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK);
+               /* For scrub block */
+               scrub_page_get(spage);
+               sblock->pagev[index] = spage;
+               /* For scrub parity */
+               scrub_page_get(spage);
+               list_add_tail(&spage->list, &sparity->spages);
+               spage->sblock = sblock;
+               spage->dev = dev;
+               spage->flags = flags;
+               spage->generation = gen;
+               spage->logical = logical;
+               spage->physical = physical;
+               spage->mirror_num = mirror_num;
+               if (csum) {
+                       spage->have_csum = 1;
+                       memcpy(spage->csum, csum, sctx->csum_size);
+               } else {
+                       spage->have_csum = 0;
+               }
+               sblock->page_count++;
+               spage->page = alloc_page(GFP_NOFS);
+               if (!spage->page)
+                       goto leave_nomem;
+               len -= l;
+               logical += l;
+               physical += l;
+       }
+
+       WARN_ON(sblock->page_count == 0);
+       for (index = 0; index < sblock->page_count; index++) {
+               struct scrub_page *spage = sblock->pagev[index];
+               int ret;
+
+               ret = scrub_add_page_to_rd_bio(sctx, spage);
+               if (ret) {
+                       scrub_block_put(sblock);
+                       return ret;
+               }
+       }
+
+       /* last one frees, either here or in bio completion for last page */
+       scrub_block_put(sblock);
+       return 0;
+}
+
+static int scrub_extent_for_parity(struct scrub_parity *sparity,
+                                  u64 logical, u64 len,
+                                  u64 physical, struct btrfs_device *dev,
+                                  u64 flags, u64 gen, int mirror_num)
+{
+       struct scrub_ctx *sctx = sparity->sctx;
+       int ret;
+       u8 csum[BTRFS_CSUM_SIZE];
+       u32 blocksize;
+
+       if (flags & BTRFS_EXTENT_FLAG_DATA) {
+               blocksize = sctx->sectorsize;
+       } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+               blocksize = sctx->nodesize;
+       } else {
+               blocksize = sctx->sectorsize;
+               WARN_ON(1);
+       }
+
+       while (len) {
+               u64 l = min_t(u64, len, blocksize);
+               int have_csum = 0;
+
+               if (flags & BTRFS_EXTENT_FLAG_DATA) {
+                       /* push csums to sbio */
+                       have_csum = scrub_find_csum(sctx, logical, l, csum);
+                       if (have_csum == 0)
+                               goto skip;
+               }
+               ret = scrub_pages_for_parity(sparity, logical, l, physical, dev,
+                                            flags, gen, mirror_num,
+                                            have_csum ? csum : NULL);
+               if (ret)
+                       return ret;
+skip:
+               len -= l;
+               logical += l;
+               physical += l;
+       }
+       return 0;
+}
+
 /*
  * Given a physical address, this will calculate it's
  * logical offset. if this is a parity stripe, it will return
@@ -2236,7 +2625,8 @@ behind_scrub_pages:
  * return 0 if it is a data stripe, 1 means parity stripe.
  */
 static int get_raid56_logic_offset(u64 physical, int num,
-                                  struct map_lookup *map, u64 *offset)
+                                  struct map_lookup *map, u64 *offset,
+                                  u64 *stripe_start)
 {
        int i;
        int j = 0;
@@ -2247,6 +2637,9 @@ static int get_raid56_logic_offset(u64 physical, int num,
 
        last_offset = (physical - map->stripes[num].physical) *
                      nr_data_stripes(map);
+       if (stripe_start)
+               *stripe_start = last_offset;
+
        *offset = last_offset;
        for (i = 0; i < nr_data_stripes(map); i++) {
                *offset = last_offset + i * map->stripe_len;
@@ -2269,13 +2662,330 @@ static int get_raid56_logic_offset(u64 physical, int num,
        return 1;
 }
 
+static void scrub_free_parity(struct scrub_parity *sparity)
+{
+       struct scrub_ctx *sctx = sparity->sctx;
+       struct scrub_page *curr, *next;
+       int nbits;
+
+       nbits = bitmap_weight(sparity->ebitmap, sparity->nsectors);
+       if (nbits) {
+               spin_lock(&sctx->stat_lock);
+               sctx->stat.read_errors += nbits;
+               sctx->stat.uncorrectable_errors += nbits;
+               spin_unlock(&sctx->stat_lock);
+       }
+
+       list_for_each_entry_safe(curr, next, &sparity->spages, list) {
+               list_del_init(&curr->list);
+               scrub_page_put(curr);
+       }
+
+       kfree(sparity);
+}
+
+static void scrub_parity_bio_endio(struct bio *bio, int error)
+{
+       struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
+       struct scrub_ctx *sctx = sparity->sctx;
+
+       if (error)
+               bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
+                         sparity->nsectors);
+
+       scrub_free_parity(sparity);
+       scrub_pending_bio_dec(sctx);
+       bio_put(bio);
+}
+
+static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
+{
+       struct scrub_ctx *sctx = sparity->sctx;
+       struct bio *bio;
+       struct btrfs_raid_bio *rbio;
+       struct scrub_page *spage;
+       struct btrfs_bio *bbio = NULL;
+       u64 *raid_map = NULL;
+       u64 length;
+       int ret;
+
+       if (!bitmap_andnot(sparity->dbitmap, sparity->dbitmap, sparity->ebitmap,
+                          sparity->nsectors))
+               goto out;
+
+       length = sparity->logic_end - sparity->logic_start + 1;
+       ret = btrfs_map_sblock(sctx->dev_root->fs_info, WRITE,
+                              sparity->logic_start,
+                              &length, &bbio, 0, &raid_map);
+       if (ret || !bbio || !raid_map)
+               goto bbio_out;
+
+       bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
+       if (!bio)
+               goto bbio_out;
+
+       bio->bi_iter.bi_sector = sparity->logic_start >> 9;
+       bio->bi_private = sparity;
+       bio->bi_end_io = scrub_parity_bio_endio;
+
+       rbio = raid56_parity_alloc_scrub_rbio(sctx->dev_root, bio, bbio,
+                                             raid_map, length,
+                                             sparity->scrub_dev,
+                                             sparity->dbitmap,
+                                             sparity->nsectors);
+       if (!rbio)
+               goto rbio_out;
+
+       list_for_each_entry(spage, &sparity->spages, list)
+               raid56_parity_add_scrub_pages(rbio, spage->page,
+                                             spage->logical);
+
+       scrub_pending_bio_inc(sctx);
+       raid56_parity_submit_scrub_rbio(rbio);
+       return;
+
+rbio_out:
+       bio_put(bio);
+bbio_out:
+       kfree(bbio);
+       kfree(raid_map);
+       bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
+                 sparity->nsectors);
+       spin_lock(&sctx->stat_lock);
+       sctx->stat.malloc_errors++;
+       spin_unlock(&sctx->stat_lock);
+out:
+       scrub_free_parity(sparity);
+}
+
+static inline int scrub_calc_parity_bitmap_len(int nsectors)
+{
+       return DIV_ROUND_UP(nsectors, BITS_PER_LONG) * (BITS_PER_LONG / 8);
+}
+
+static void scrub_parity_get(struct scrub_parity *sparity)
+{
+       atomic_inc(&sparity->ref_count);
+}
+
+static void scrub_parity_put(struct scrub_parity *sparity)
+{
+       if (!atomic_dec_and_test(&sparity->ref_count))
+               return;
+
+       scrub_parity_check_and_repair(sparity);
+}
+
+static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx,
+                                                 struct map_lookup *map,
+                                                 struct btrfs_device *sdev,
+                                                 struct btrfs_path *path,
+                                                 u64 logic_start,
+                                                 u64 logic_end)
+{
+       struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
+       struct btrfs_root *root = fs_info->extent_root;
+       struct btrfs_root *csum_root = fs_info->csum_root;
+       struct btrfs_extent_item *extent;
+       u64 flags;
+       int ret;
+       int slot;
+       struct extent_buffer *l;
+       struct btrfs_key key;
+       u64 generation;
+       u64 extent_logical;
+       u64 extent_physical;
+       u64 extent_len;
+       struct btrfs_device *extent_dev;
+       struct scrub_parity *sparity;
+       int nsectors;
+       int bitmap_len;
+       int extent_mirror_num;
+       int stop_loop = 0;
+
+       nsectors = map->stripe_len / root->sectorsize;
+       bitmap_len = scrub_calc_parity_bitmap_len(nsectors);
+       sparity = kzalloc(sizeof(struct scrub_parity) + 2 * bitmap_len,
+                         GFP_NOFS);
+       if (!sparity) {
+               spin_lock(&sctx->stat_lock);
+               sctx->stat.malloc_errors++;
+               spin_unlock(&sctx->stat_lock);
+               return -ENOMEM;
+       }
+
+       sparity->stripe_len = map->stripe_len;
+       sparity->nsectors = nsectors;
+       sparity->sctx = sctx;
+       sparity->scrub_dev = sdev;
+       sparity->logic_start = logic_start;
+       sparity->logic_end = logic_end;
+       atomic_set(&sparity->ref_count, 1);
+       INIT_LIST_HEAD(&sparity->spages);
+       sparity->dbitmap = sparity->bitmap;
+       sparity->ebitmap = (void *)sparity->bitmap + bitmap_len;
+
+       ret = 0;
+       while (logic_start < logic_end) {
+               if (btrfs_fs_incompat(fs_info, SKINNY_METADATA))
+                       key.type = BTRFS_METADATA_ITEM_KEY;
+               else
+                       key.type = BTRFS_EXTENT_ITEM_KEY;
+               key.objectid = logic_start;
+               key.offset = (u64)-1;
+
+               ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+               if (ret < 0)
+                       goto out;
+
+               if (ret > 0) {
+                       ret = btrfs_previous_extent_item(root, path, 0);
+                       if (ret < 0)
+                               goto out;
+                       if (ret > 0) {
+                               btrfs_release_path(path);
+                               ret = btrfs_search_slot(NULL, root, &key,
+                                                       path, 0, 0);
+                               if (ret < 0)
+                                       goto out;
+                       }
+               }
+
+               stop_loop = 0;
+               while (1) {
+                       u64 bytes;
+
+                       l = path->nodes[0];
+                       slot = path->slots[0];
+                       if (slot >= btrfs_header_nritems(l)) {
+                               ret = btrfs_next_leaf(root, path);
+                               if (ret == 0)
+                                       continue;
+                               if (ret < 0)
+                                       goto out;
+
+                               stop_loop = 1;
+                               break;
+                       }
+                       btrfs_item_key_to_cpu(l, &key, slot);
+
+                       if (key.type == BTRFS_METADATA_ITEM_KEY)
+                               bytes = root->nodesize;
+                       else
+                               bytes = key.offset;
+
+                       if (key.objectid + bytes <= logic_start)
+                               goto next;
+
+                       if (key.type != BTRFS_EXTENT_ITEM_KEY &&
+                           key.type != BTRFS_METADATA_ITEM_KEY)
+                               goto next;
+
+                       if (key.objectid > logic_end) {
+                               stop_loop = 1;
+                               break;
+                       }
+
+                       while (key.objectid >= logic_start + map->stripe_len)
+                               logic_start += map->stripe_len;
+
+                       extent = btrfs_item_ptr(l, slot,
+                                               struct btrfs_extent_item);
+                       flags = btrfs_extent_flags(l, extent);
+                       generation = btrfs_extent_generation(l, extent);
+
+                       if (key.objectid < logic_start &&
+                           (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
+                               btrfs_err(fs_info,
+                                         "scrub: tree block %llu spanning stripes, ignored. logical=%llu",
+                                          key.objectid, logic_start);
+                               goto next;
+                       }
+again:
+                       extent_logical = key.objectid;
+                       extent_len = bytes;
+
+                       if (extent_logical < logic_start) {
+                               extent_len -= logic_start - extent_logical;
+                               extent_logical = logic_start;
+                       }
+
+                       if (extent_logical + extent_len >
+                           logic_start + map->stripe_len)
+                               extent_len = logic_start + map->stripe_len -
+                                            extent_logical;
+
+                       scrub_parity_mark_sectors_data(sparity, extent_logical,
+                                                      extent_len);
+
+                       scrub_remap_extent(fs_info, extent_logical,
+                                          extent_len, &extent_physical,
+                                          &extent_dev,
+                                          &extent_mirror_num);
+
+                       ret = btrfs_lookup_csums_range(csum_root,
+                                               extent_logical,
+                                               extent_logical + extent_len - 1,
+                                               &sctx->csum_list, 1);
+                       if (ret)
+                               goto out;
+
+                       ret = scrub_extent_for_parity(sparity, extent_logical,
+                                                     extent_len,
+                                                     extent_physical,
+                                                     extent_dev, flags,
+                                                     generation,
+                                                     extent_mirror_num);
+                       if (ret)
+                               goto out;
+
+                       scrub_free_csums(sctx);
+                       if (extent_logical + extent_len <
+                           key.objectid + bytes) {
+                               logic_start += map->stripe_len;
+
+                               if (logic_start >= logic_end) {
+                                       stop_loop = 1;
+                                       break;
+                               }
+
+                               if (logic_start < key.objectid + bytes) {
+                                       cond_resched();
+                                       goto again;
+                               }
+                       }
+next:
+                       path->slots[0]++;
+               }
+
+               btrfs_release_path(path);
+
+               if (stop_loop)
+                       break;
+
+               logic_start += map->stripe_len;
+       }
+out:
+       if (ret < 0)
+               scrub_parity_mark_sectors_error(sparity, logic_start,
+                                               logic_end - logic_start + 1);
+       scrub_parity_put(sparity);
+       scrub_submit(sctx);
+       mutex_lock(&sctx->wr_ctx.wr_lock);
+       scrub_wr_submit(sctx);
+       mutex_unlock(&sctx->wr_ctx.wr_lock);
+
+       btrfs_release_path(path);
+       return ret < 0 ? ret : 0;
+}
+
 static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
                                           struct map_lookup *map,
                                           struct btrfs_device *scrub_dev,
                                           int num, u64 base, u64 length,
                                           int is_dev_replace)
 {
-       struct btrfs_path *path;
+       struct btrfs_path *path, *ppath;
        struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
        struct btrfs_root *root = fs_info->extent_root;
        struct btrfs_root *csum_root = fs_info->csum_root;
@@ -2302,6 +3012,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
        u64 extent_logical;
        u64 extent_physical;
        u64 extent_len;
+       u64 stripe_logical;
+       u64 stripe_end;
        struct btrfs_device *extent_dev;
        int extent_mirror_num;
        int stop_loop = 0;
@@ -2327,7 +3039,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
                mirror_num = num % map->num_stripes + 1;
        } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
                                BTRFS_BLOCK_GROUP_RAID6)) {
-               get_raid56_logic_offset(physical, num, map, &offset);
+               get_raid56_logic_offset(physical, num, map, &offset, NULL);
                increment = map->stripe_len * nr_data_stripes(map);
                mirror_num = 1;
        } else {
@@ -2339,6 +3051,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
        if (!path)
                return -ENOMEM;
 
+       ppath = btrfs_alloc_path();
+       if (!ppath) {
+               btrfs_free_path(ppath);
+               return -ENOMEM;
+       }
+
        /*
         * work on commit root. The related disk blocks are static as
         * long as COW is applied. This means, it is save to rewrite
@@ -2357,7 +3075,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
        if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
                         BTRFS_BLOCK_GROUP_RAID6)) {
                get_raid56_logic_offset(physical_end, num,
-                                       map, &logic_end);
+                                       map, &logic_end, NULL);
                logic_end += base;
        } else {
                logic_end = logical + increment * nstripes;
@@ -2404,10 +3122,18 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
                if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
                                BTRFS_BLOCK_GROUP_RAID6)) {
                        ret = get_raid56_logic_offset(physical, num,
-                                       map, &logical);
+                                       map, &logical, &stripe_logical);
                        logical += base;
-                       if (ret)
+                       if (ret) {
+                               stripe_logical += base;
+                               stripe_end = stripe_logical + increment - 1;
+                               ret = scrub_raid56_parity(sctx, map, scrub_dev,
+                                               ppath, stripe_logical,
+                                               stripe_end);
+                               if (ret)
+                                       goto out;
                                goto skip;
+                       }
                }
                /*
                 * canceled?
@@ -2558,13 +3284,25 @@ again:
                                         * loop until we find next data stripe
                                         * or we have finished all stripes.
                                         */
-                                       do {
-                                               physical += map->stripe_len;
-                                               ret = get_raid56_logic_offset(
-                                                               physical, num,
-                                                               map, &logical);
-                                               logical += base;
-                                       } while (physical < physical_end && ret);
+loop:
+                                       physical += map->stripe_len;
+                                       ret = get_raid56_logic_offset(physical,
+                                                       num, map, &logical,
+                                                       &stripe_logical);
+                                       logical += base;
+
+                                       if (ret && physical < physical_end) {
+                                               stripe_logical += base;
+                                               stripe_end = stripe_logical +
+                                                               increment - 1;
+                                               ret = scrub_raid56_parity(sctx,
+                                                       map, scrub_dev, ppath,
+                                                       stripe_logical,
+                                                       stripe_end);
+                                               if (ret)
+                                                       goto out;
+                                               goto loop;
+                                       }
                                } else {
                                        physical += map->stripe_len;
                                        logical += increment;
@@ -2605,6 +3343,7 @@ out:
 
        blk_finish_plug(&plug);
        btrfs_free_path(path);
+       btrfs_free_path(ppath);
        return ret < 0 ? ret : 0;
 }
 
@@ -3310,6 +4049,50 @@ out:
        scrub_pending_trans_workers_dec(sctx);
 }
 
+static int check_extent_to_block(struct inode *inode, u64 start, u64 len,
+                                u64 logical)
+{
+       struct extent_state *cached_state = NULL;
+       struct btrfs_ordered_extent *ordered;
+       struct extent_io_tree *io_tree;
+       struct extent_map *em;
+       u64 lockstart = start, lockend = start + len - 1;
+       int ret = 0;
+
+       io_tree = &BTRFS_I(inode)->io_tree;
+
+       lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state);
+       ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
+       if (ordered) {
+               btrfs_put_ordered_extent(ordered);
+               ret = 1;
+               goto out_unlock;
+       }
+
+       em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
+       if (IS_ERR(em)) {
+               ret = PTR_ERR(em);
+               goto out_unlock;
+       }
+
+       /*
+        * This extent does not actually cover the logical extent anymore,
+        * move on to the next inode.
+        */
+       if (em->block_start > logical ||
+           em->block_start + em->block_len < logical + len) {
+               free_extent_map(em);
+               ret = 1;
+               goto out_unlock;
+       }
+       free_extent_map(em);
+
+out_unlock:
+       unlock_extent_cached(io_tree, lockstart, lockend, &cached_state,
+                            GFP_NOFS);
+       return ret;
+}
+
 static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
                                      struct scrub_copy_nocow_ctx *nocow_ctx)
 {
@@ -3318,13 +4101,10 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
        struct inode *inode;
        struct page *page;
        struct btrfs_root *local_root;
-       struct btrfs_ordered_extent *ordered;
-       struct extent_map *em;
-       struct extent_state *cached_state = NULL;
        struct extent_io_tree *io_tree;
        u64 physical_for_dev_replace;
+       u64 nocow_ctx_logical;
        u64 len = nocow_ctx->len;
-       u64 lockstart = offset, lockend = offset + len - 1;
        unsigned long index;
        int srcu_index;
        int ret = 0;
@@ -3356,30 +4136,13 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
 
        physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
        io_tree = &BTRFS_I(inode)->io_tree;
+       nocow_ctx_logical = nocow_ctx->logical;
 
-       lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state);
-       ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
-       if (ordered) {
-               btrfs_put_ordered_extent(ordered);
-               goto out_unlock;
-       }
-
-       em = btrfs_get_extent(inode, NULL, 0, lockstart, len, 0);
-       if (IS_ERR(em)) {
-               ret = PTR_ERR(em);
-               goto out_unlock;
-       }
-
-       /*
-        * This extent does not actually cover the logical extent anymore,
-        * move on to the next inode.
-        */
-       if (em->block_start > nocow_ctx->logical ||
-           em->block_start + em->block_len < nocow_ctx->logical + len) {
-               free_extent_map(em);
-               goto out_unlock;
+       ret = check_extent_to_block(inode, offset, len, nocow_ctx_logical);
+       if (ret) {
+               ret = ret > 0 ? 0 : ret;
+               goto out;
        }
-       free_extent_map(em);
 
        while (len >= PAGE_CACHE_SIZE) {
                index = offset >> PAGE_CACHE_SHIFT;
@@ -3396,7 +4159,7 @@ again:
                                goto next_page;
                } else {
                        ClearPageError(page);
-                       err = extent_read_full_page_nolock(io_tree, page,
+                       err = extent_read_full_page(io_tree, page,
                                                           btrfs_get_extent,
                                                           nocow_ctx->mirror_num);
                        if (err) {
@@ -3421,6 +4184,14 @@ again:
                                goto next_page;
                        }
                }
+
+               ret = check_extent_to_block(inode, offset, len,
+                                           nocow_ctx_logical);
+               if (ret) {
+                       ret = ret > 0 ? 0 : ret;
+                       goto next_page;
+               }
+
                err = write_page_nocow(nocow_ctx->sctx,
                                       physical_for_dev_replace, page);
                if (err)
@@ -3434,12 +4205,10 @@ next_page:
 
                offset += PAGE_CACHE_SIZE;
                physical_for_dev_replace += PAGE_CACHE_SIZE;
+               nocow_ctx_logical += PAGE_CACHE_SIZE;
                len -= PAGE_CACHE_SIZE;
        }
        ret = COPY_COMPLETE;
-out_unlock:
-       unlock_extent_cached(io_tree, lockstart, lockend, &cached_state,
-                            GFP_NOFS);
 out:
        mutex_unlock(&inode->i_mutex);
        iput(inode);