UBI: preserve corrupted PEBs
authorArtem Bityutskiy <Artem.Bityutskiy@nokia.com>
Fri, 3 Sep 2010 20:08:15 +0000 (23:08 +0300)
committerArtem Bityutskiy <Artem.Bityutskiy@nokia.com>
Tue, 19 Oct 2010 14:19:57 +0000 (17:19 +0300)
Currently UBI erases all corrupted eraseblocks, irrespectively of the nature
of corruption: corruption due to power cuts and non-power cut corruption.
The former case is OK, but the latter is not, because UBI may destroy
potentially important data.

With this patch, during scanning, when UBI hits a PEB with corrupted VID
header, it checks whether this PEB contains only 0xFF data. If yes, it is
safe to erase this PEB and it is put to the 'erase' list. If not, this may
be important data and it is better to avoid erasing this PEB. Instead,
UBI puts it to the corr list and moves out of the pool of available PEB.
IOW, UBI preserves this PEB.

Such corrupted PEB lessen the amount of available PEBs. So the more of them
we accumulate, the less PEBs are available. The maximum amount of non-power
cut corrupted PEBs is 8.

This patch is a response to UBIFS problem where reporter
(Matthew L. Creech <mlcreech@gmail.com>) observes that UBIFS index points
to an unmapped LEB. The theory is that corresponding PEB somehow got
corrupted and UBI wiped it. This patch (actually a series of patches)
tries to make sure such PEBs are preserved - this would make it is easier
to analyze the corruption.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
drivers/mtd/ubi/build.c
drivers/mtd/ubi/eba.c
drivers/mtd/ubi/scan.c
drivers/mtd/ubi/ubi.h
drivers/mtd/ubi/vmt.c
drivers/mtd/ubi/vtbl.c
drivers/mtd/ubi/wl.c

index f247c4e..5ebe280 100644 (file)
@@ -591,6 +591,7 @@ static int attach_by_scanning(struct ubi_device *ubi)
 
        ubi->bad_peb_count = si->bad_peb_count;
        ubi->good_peb_count = ubi->peb_count - ubi->bad_peb_count;
+       ubi->corr_peb_count = si->corr_peb_count;
        ubi->max_ec = si->max_ec;
        ubi->mean_ec = si->mean_ec;
        ubi_msg("max. sequence number:       %llu", si->max_sqnum);
@@ -972,6 +973,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
        ubi_msg("MTD device size:            %llu MiB", ubi->flash_size >> 20);
        ubi_msg("number of good PEBs:        %d", ubi->good_peb_count);
        ubi_msg("number of bad PEBs:         %d", ubi->bad_peb_count);
+       ubi_msg("number of corrupted PEBs:   %d", ubi->corr_peb_count);
        ubi_msg("max. allowed volumes:       %d", ubi->vtbl_slots);
        ubi_msg("wear-leveling threshold:    %d", CONFIG_MTD_UBI_WL_THRESHOLD);
        ubi_msg("number of internal volumes: %d", UBI_INT_VOL_COUNT);
index 334865e..4be6718 100644 (file)
@@ -1201,6 +1201,9 @@ static void print_rsvd_warning(struct ubi_device *ubi,
 
        ubi_warn("cannot reserve enough PEBs for bad PEB handling, reserved %d,"
                 " need %d", ubi->beb_rsvd_pebs, ubi->beb_rsvd_level);
+       if (ubi->corr_peb_count)
+               ubi_warn("%d PEBs are corrupted and not used",
+                       ubi->corr_peb_count);
 }
 
 /**
@@ -1263,6 +1266,9 @@ int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
        if (ubi->avail_pebs < EBA_RESERVED_PEBS) {
                ubi_err("no enough physical eraseblocks (%d, need %d)",
                        ubi->avail_pebs, EBA_RESERVED_PEBS);
+               if (ubi->corr_peb_count)
+                       ubi_err("%d PEBs are corrupted and not used",
+                               ubi->corr_peb_count);
                err = -ENOSPC;
                goto out_free;
        }
index def0bf0..30b7102 100644 (file)
@@ -706,8 +706,8 @@ out_free:
 struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi,
                                           struct ubi_scan_info *si)
 {
-       int err = 0, i;
-       struct ubi_scan_leb *seb;
+       int err = 0;
+       struct ubi_scan_leb *seb, *tmp_seb;
 
        if (!list_empty(&si->free)) {
                seb = list_entry(si->free.next, struct ubi_scan_leb, u.list);
@@ -716,37 +716,27 @@ struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi,
                return seb;
        }
 
-       for (i = 0; i < 2; i++) {
-               struct list_head *head;
-               struct ubi_scan_leb *tmp_seb;
-
-               if (i == 0)
-                       head = &si->erase;
-               else
-                       head = &si->corr;
-
-               /*
-                * We try to erase the first physical eraseblock from the @head
-                * list and pick it if we succeed, or try to erase the
-                * next one if not. And so forth. We don't want to take care
-                * about bad eraseblocks here - they'll be handled later.
-                */
-               list_for_each_entry_safe(seb, tmp_seb, head, u.list) {
-                       if (seb->ec == UBI_SCAN_UNKNOWN_EC)
-                               seb->ec = si->mean_ec;
+       /*
+        * We try to erase the first physical eraseblock from the erase list
+        * and pick it if we succeed, or try to erase the next one if not. And
+        * so forth. We don't want to take care about bad eraseblocks here -
+        * they'll be handled later.
+        */
+       list_for_each_entry_safe(seb, tmp_seb, &si->erase, u.list) {
+               if (seb->ec == UBI_SCAN_UNKNOWN_EC)
+                       seb->ec = si->mean_ec;
 
-                       err = ubi_scan_erase_peb(ubi, si, seb->pnum, seb->ec+1);
-                       if (err)
-                               continue;
+               err = ubi_scan_erase_peb(ubi, si, seb->pnum, seb->ec+1);
+               if (err)
+                       continue;
 
-                       seb->ec += 1;
-                       list_del(&seb->u.list);
-                       dbg_bld("return PEB %d, EC %d", seb->pnum, seb->ec);
-                       return seb;
-               }
+               seb->ec += 1;
+               list_del(&seb->u.list);
+               dbg_bld("return PEB %d, EC %d", seb->pnum, seb->ec);
+               return seb;
        }
 
-       ubi_err("no eraseblocks found");
+       ubi_err("no free eraseblocks");
        return ERR_PTR(-ENOSPC);
 }
 
index 8831d7b..0b0149c 100644 (file)
@@ -361,6 +361,8 @@ struct ubi_wl_entry;
  * @peb_size: physical eraseblock size
  * @bad_peb_count: count of bad physical eraseblocks
  * @good_peb_count: count of good physical eraseblocks
+ * @corr_peb_count: count of corrupted physical eraseblocks (preserved and not
+ *                  used by UBI)
  * @erroneous_peb_count: count of erroneous physical eraseblocks in @erroneous
  * @max_erroneous: maximum allowed amount of erroneous physical eraseblocks
  * @min_io_size: minimal input/output unit size of the underlying MTD device
@@ -447,6 +449,7 @@ struct ubi_device {
        int peb_size;
        int bad_peb_count;
        int good_peb_count;
+       int corr_peb_count;
        int erroneous_peb_count;
        int max_erroneous;
        int min_io_size;
index e42afab..c47620d 100644 (file)
@@ -261,6 +261,9 @@ int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
        /* Reserve physical eraseblocks */
        if (vol->reserved_pebs > ubi->avail_pebs) {
                dbg_err("not enough PEBs, only %d available", ubi->avail_pebs);
+               if (ubi->corr_peb_count)
+                       dbg_err("%d PEBs are corrupted and not used",
+                               ubi->corr_peb_count);
                err = -ENOSPC;
                goto out_unlock;
        }
@@ -527,6 +530,9 @@ int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs)
                if (pebs > ubi->avail_pebs) {
                        dbg_err("not enough PEBs: requested %d, available %d",
                                pebs, ubi->avail_pebs);
+                       if (ubi->corr_peb_count)
+                               dbg_err("%d PEBs are corrupted and not used",
+                                       ubi->corr_peb_count);
                        spin_unlock(&ubi->volumes_lock);
                        err = -ENOSPC;
                        goto out_free;
index 3bfe00a..fcdb7f6 100644 (file)
@@ -662,9 +662,13 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si,
        ubi->vol_count += 1;
        vol->ubi = ubi;
 
-       if (reserved_pebs > ubi->avail_pebs)
+       if (reserved_pebs > ubi->avail_pebs) {
                ubi_err("not enough PEBs, required %d, available %d",
                        reserved_pebs, ubi->avail_pebs);
+               if (ubi->corr_peb_count)
+                       ubi_err("%d PEBs are corrupted and not used",
+                               ubi->corr_peb_count);
+       }
        ubi->rsvd_pebs += reserved_pebs;
        ubi->avail_pebs -= reserved_pebs;
 
@@ -837,7 +841,7 @@ int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si)
                        return PTR_ERR(ubi->vtbl);
        }
 
-       ubi->avail_pebs = ubi->good_peb_count;
+       ubi->avail_pebs = ubi->good_peb_count - ubi->corr_peb_count;
 
        /*
         * The layout volume is OK, initialize the corresponding in-RAM data
index 605ecb1..655bbbe 100644 (file)
@@ -1478,22 +1478,6 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
                ubi->lookuptbl[e->pnum] = e;
        }
 
-       list_for_each_entry(seb, &si->corr, u.list) {
-               cond_resched();
-
-               e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
-               if (!e)
-                       goto out_free;
-
-               e->pnum = seb->pnum;
-               e->ec = seb->ec;
-               ubi->lookuptbl[e->pnum] = e;
-               if (schedule_erase(ubi, e, 0)) {
-                       kmem_cache_free(ubi_wl_entry_slab, e);
-                       goto out_free;
-               }
-       }
-
        ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) {
                ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) {
                        cond_resched();
@@ -1520,6 +1504,9 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
        if (ubi->avail_pebs < WL_RESERVED_PEBS) {
                ubi_err("no enough physical eraseblocks (%d, need %d)",
                        ubi->avail_pebs, WL_RESERVED_PEBS);
+               if (ubi->corr_peb_count)
+                       ubi_err("%d PEBs are corrupted and not used",
+                               ubi->corr_peb_count);
                goto out_free;
        }
        ubi->avail_pebs -= WL_RESERVED_PEBS;