xfs: refactor log record start detection into a new helper
[cascardo/linux.git] / fs / xfs / xfs_log_recover.c
index c5ecaac..423c36d 100644 (file)
@@ -867,6 +867,79 @@ validate_head:
        return error;
 }
 
+/*
+ * Seek backwards in the log for log record headers.
+ *
+ * Given a starting log block, walk backwards until we find the provided number
+ * of records or hit the provided tail block. The return value is the number of
+ * records encountered or a negative error code. The log block and buffer
+ * pointer of the last record seen are returned in rblk and rhead respectively.
+ */
+STATIC int
+xlog_rseek_logrec_hdr(
+       struct xlog             *log,
+       xfs_daddr_t             head_blk,
+       xfs_daddr_t             tail_blk,
+       int                     count,
+       struct xfs_buf          *bp,
+       xfs_daddr_t             *rblk,
+       struct xlog_rec_header  **rhead,
+       bool                    *wrapped)
+{
+       int                     i;
+       int                     error;
+       int                     found = 0;
+       char                    *offset = NULL;
+       xfs_daddr_t             end_blk;
+
+       *wrapped = false;
+
+       /*
+        * Walk backwards from the head block until we hit the tail or the first
+        * block in the log.
+        */
+       end_blk = head_blk > tail_blk ? tail_blk : 0;
+       for (i = (int) head_blk - 1; i >= end_blk; i--) {
+               error = xlog_bread(log, i, 1, bp, &offset);
+               if (error)
+                       goto out_error;
+
+               if (*(__be32 *) offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
+                       *rblk = i;
+                       *rhead = (struct xlog_rec_header *) offset;
+                       if (++found == count)
+                               break;
+               }
+       }
+
+       /*
+        * If we haven't hit the tail block or the log record header count,
+        * start looking again from the end of the physical log. Note that
+        * callers can pass head == tail if the tail is not yet known.
+        */
+       if (tail_blk >= head_blk && found != count) {
+               for (i = log->l_logBBsize - 1; i >= (int) tail_blk; i--) {
+                       error = xlog_bread(log, i, 1, bp, &offset);
+                       if (error)
+                               goto out_error;
+
+                       if (*(__be32 *)offset ==
+                           cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
+                               *wrapped = true;
+                               *rblk = i;
+                               *rhead = (struct xlog_rec_header *) offset;
+                               if (++found == count)
+                                       break;
+                       }
+               }
+       }
+
+       return found;
+
+out_error:
+       return error;
+}
+
 /*
  * Find the sync block number or the tail of the log.
  *
@@ -898,8 +971,7 @@ xlog_find_tail(
        xfs_daddr_t             after_umount_blk;
        xfs_lsn_t               tail_lsn;
        int                     hblks;
-
-       found = 0;
+       bool                    wrapped = false;
 
        /*
         * Find previous log record
@@ -923,37 +995,16 @@ xlog_find_tail(
        }
 
        /*
-        * Search backwards looking for log record header block
+        * Search backwards through the log looking for the log record header
+        * block. This wraps all the way back around to the head so something is
+        * seriously wrong if we can't find it.
         */
        ASSERT(*head_blk < INT_MAX);
-       for (i = (int)(*head_blk) - 1; i >= 0; i--) {
-               error = xlog_bread(log, i, 1, bp, &offset);
-               if (error)
-                       goto done;
-
-               if (*(__be32 *)offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
-                       found = 1;
-                       break;
-               }
-       }
-       /*
-        * If we haven't found the log record header block, start looking
-        * again from the end of the physical log.  XXXmiken: There should be
-        * a check here to make sure we didn't search more than N blocks in
-        * the previous code.
-        */
-       if (!found) {
-               for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) {
-                       error = xlog_bread(log, i, 1, bp, &offset);
-                       if (error)
-                               goto done;
-
-                       if (*(__be32 *)offset ==
-                           cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
-                               found = 2;
-                               break;
-                       }
-               }
+       found = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, &i,
+                                     &rhead, &wrapped);
+       if (found < 0) {
+               error = found;
+               goto done;
        }
        if (!found) {
                xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
@@ -961,9 +1012,6 @@ xlog_find_tail(
                ASSERT(0);
                return -EIO;
        }
-
-       /* find blk_no of tail of log */
-       rhead = (xlog_rec_header_t *)offset;
        *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn));
 
        /*
@@ -979,7 +1027,7 @@ xlog_find_tail(
        log->l_prev_block = i;
        log->l_curr_block = (int)*head_blk;
        log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
-       if (found == 2)
+       if (wrapped)
                log->l_curr_cycle++;
        atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
        atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
@@ -4118,26 +4166,69 @@ xlog_recover_process_iunlinks(
        mp->m_dmevmask = mp_dmevmask;
 }
 
+STATIC int
+xlog_unpack_data(
+       struct xlog_rec_header  *rhead,
+       char                    *dp,
+       struct xlog             *log)
+{
+       int                     i, j, k;
+
+       for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) &&
+                 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
+               *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i];
+               dp += BBSIZE;
+       }
+
+       if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+               xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead;
+               for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) {
+                       j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
+                       k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
+                       *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k];
+                       dp += BBSIZE;
+               }
+       }
+
+       return 0;
+}
+
 /*
- * Upack the log buffer data and crc check it. If the check fails, issue a
- * warning if and only if the CRC in the header is non-zero. This makes the
- * check an advisory warning, and the zero CRC check will prevent failure
- * warnings from being emitted when upgrading the kernel from one that does not
- * add CRCs by default.
- *
- * When filesystems are CRC enabled, this CRC mismatch becomes a fatal log
- * corruption failure
+ * CRC check, unpack and process a log record.
  */
 STATIC int
-xlog_unpack_data_crc(
+xlog_recover_process(
+       struct xlog             *log,
+       struct hlist_head       rhash[],
        struct xlog_rec_header  *rhead,
        char                    *dp,
-       struct xlog             *log)
+       int                     pass)
 {
+       int                     error;
        __le32                  crc;
 
        crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len));
-       if (crc != rhead->h_crc) {
+
+       /*
+        * Nothing else to do if this is a CRC verification pass. Just return
+        * if this a record with a non-zero crc. Unfortunately, mkfs always
+        * sets h_crc to 0 so we must consider this valid even on v5 supers.
+        * Otherwise, return EFSBADCRC on failure so the callers up the stack
+        * know precisely what failed.
+        */
+       if (pass == XLOG_RECOVER_CRCPASS) {
+               if (rhead->h_crc && crc != le32_to_cpu(rhead->h_crc))
+                       return -EFSBADCRC;
+               return 0;
+       }
+
+       /*
+        * We're in the normal recovery path. Issue a warning if and only if the
+        * CRC in the header is non-zero. This is an advisory warning and the
+        * zero CRC check prevents warnings from being emitted when upgrading
+        * the kernel from one that does not add CRCs by default.
+        */
+       if (crc != le32_to_cpu(rhead->h_crc)) {
                if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) {
                        xfs_alert(log->l_mp,
                "log record CRC mismatch: found 0x%x, expected 0x%x.",
@@ -4147,47 +4238,18 @@ xlog_unpack_data_crc(
                }
 
                /*
-                * If we've detected a log record corruption, then we can't
-                * recover past this point. Abort recovery if we are enforcing
-                * CRC protection by punting an error back up the stack.
+                * If the filesystem is CRC enabled, this mismatch becomes a
+                * fatal log corruption failure.
                 */
                if (xfs_sb_version_hascrc(&log->l_mp->m_sb))
                        return -EFSCORRUPTED;
        }
 
-       return 0;
-}
-
-STATIC int
-xlog_unpack_data(
-       struct xlog_rec_header  *rhead,
-       char                    *dp,
-       struct xlog             *log)
-{
-       int                     i, j, k;
-       int                     error;
-
-       error = xlog_unpack_data_crc(rhead, dp, log);
+       error = xlog_unpack_data(rhead, dp, log);
        if (error)
                return error;
 
-       for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) &&
-                 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
-               *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i];
-               dp += BBSIZE;
-       }
-
-       if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
-               xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead;
-               for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) {
-                       j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
-                       k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
-                       *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k];
-                       dp += BBSIZE;
-               }
-       }
-
-       return 0;
+       return xlog_recover_process_data(log, rhash, rhead, dp, pass);
 }
 
 STATIC int
@@ -4239,18 +4301,21 @@ xlog_do_recovery_pass(
        struct xlog             *log,
        xfs_daddr_t             head_blk,
        xfs_daddr_t             tail_blk,
-       int                     pass)
+       int                     pass,
+       xfs_daddr_t             *first_bad)     /* out: first bad log rec */
 {
        xlog_rec_header_t       *rhead;
        xfs_daddr_t             blk_no;
+       xfs_daddr_t             rhead_blk;
        char                    *offset;
        xfs_buf_t               *hbp, *dbp;
-       int                     error = 0, h_size;
+       int                     error = 0, h_size, h_len;
        int                     bblks, split_bblks;
        int                     hblks, split_hblks, wrapped_hblks;
        struct hlist_head       rhash[XLOG_RHASH_SIZE];
 
        ASSERT(head_blk != tail_blk);
+       rhead_blk = 0;
 
        /*
         * Read the header of the tail block and get the iclog buffer size from
@@ -4274,7 +4339,31 @@ xlog_do_recovery_pass(
                error = xlog_valid_rec_header(log, rhead, tail_blk);
                if (error)
                        goto bread_err1;
+
+               /*
+                * xfsprogs has a bug where record length is based on lsunit but
+                * h_size (iclog size) is hardcoded to 32k. Now that we
+                * unconditionally CRC verify the unmount record, this means the
+                * log buffer can be too small for the record and cause an
+                * overrun.
+                *
+                * Detect this condition here. Use lsunit for the buffer size as
+                * long as this looks like the mkfs case. Otherwise, return an
+                * error to avoid a buffer overrun.
+                */
                h_size = be32_to_cpu(rhead->h_size);
+               h_len = be32_to_cpu(rhead->h_len);
+               if (h_len > h_size) {
+                       if (h_len <= log->l_mp->m_logbsize &&
+                           be32_to_cpu(rhead->h_num_logops) == 1) {
+                               xfs_warn(log->l_mp,
+               "invalid iclog size (%d bytes), using lsunit (%d bytes)",
+                                        h_size, log->l_mp->m_logbsize);
+                               h_size = log->l_mp->m_logbsize;
+                       } else
+                               return -EFSCORRUPTED;
+               }
+
                if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) &&
                    (h_size > XLOG_HEADER_CYCLE_SIZE)) {
                        hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
@@ -4301,7 +4390,7 @@ xlog_do_recovery_pass(
        }
 
        memset(rhash, 0, sizeof(rhash));
-       blk_no = tail_blk;
+       blk_no = rhead_blk = tail_blk;
        if (tail_blk > head_blk) {
                /*
                 * Perform recovery around the end of the physical log.
@@ -4408,19 +4497,18 @@ xlog_do_recovery_pass(
                                        goto bread_err2;
                        }
 
-                       error = xlog_unpack_data(rhead, offset, log);
+                       error = xlog_recover_process(log, rhash, rhead, offset,
+                                                    pass);
                        if (error)
                                goto bread_err2;
 
-                       error = xlog_recover_process_data(log, rhash,
-                                                       rhead, offset, pass);
-                       if (error)
-                               goto bread_err2;
                        blk_no += bblks;
+                       rhead_blk = blk_no;
                }
 
                ASSERT(blk_no >= log->l_logBBsize);
                blk_no -= log->l_logBBsize;
+               rhead_blk = blk_no;
        }
 
        /* read first part of physical log */
@@ -4441,21 +4529,22 @@ xlog_do_recovery_pass(
                if (error)
                        goto bread_err2;
 
-               error = xlog_unpack_data(rhead, offset, log);
+               error = xlog_recover_process(log, rhash, rhead, offset, pass);
                if (error)
                        goto bread_err2;
 
-               error = xlog_recover_process_data(log, rhash,
-                                               rhead, offset, pass);
-               if (error)
-                       goto bread_err2;
                blk_no += bblks + hblks;
+               rhead_blk = blk_no;
        }
 
  bread_err2:
        xlog_put_bp(dbp);
  bread_err1:
        xlog_put_bp(hbp);
+
+       if (error && first_bad)
+               *first_bad = rhead_blk;
+
        return error;
 }
 
@@ -4493,7 +4582,7 @@ xlog_do_log_recovery(
                INIT_LIST_HEAD(&log->l_buf_cancel_table[i]);
 
        error = xlog_do_recovery_pass(log, head_blk, tail_blk,
-                                     XLOG_RECOVER_PASS1);
+                                     XLOG_RECOVER_PASS1, NULL);
        if (error != 0) {
                kmem_free(log->l_buf_cancel_table);
                log->l_buf_cancel_table = NULL;
@@ -4504,7 +4593,7 @@ xlog_do_log_recovery(
         * When it is complete free the table of buf cancel items.
         */
        error = xlog_do_recovery_pass(log, head_blk, tail_blk,
-                                     XLOG_RECOVER_PASS2);
+                                     XLOG_RECOVER_PASS2, NULL);
 #ifdef DEBUG
        if (!error) {
                int     i;