Merge tag 'xfs-for-linus-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 7 Sep 2015 20:28:32 +0000 (13:28 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 7 Sep 2015 20:28:32 +0000 (13:28 -0700)
Pull xfs updates from Dave Chinner:
 "There isn't a whole lot to this update - it's mostly bug fixes and
  they are spread pretty much all over XFS.  There are some corruption
  fixes, some fixes for log recovery, some fixes that prevent unount
  from hanging, a lockdep annotation rework for inode locking to prevent
  false positives and the usual random bunch of cleanups and minor
  improvements.

  Deatils:

   - large rework of EFI/EFD lifecycle handling to fix log recovery
     corruption issues, crashes and unmount hangs

   - separate metadata UUID on disk to enable changing boot label UUID
     for v5 filesystems

   - fixes for gcc miscompilation on certain platforms and optimisation
     levels

   - remote attribute allocation and recovery corruption fixes

   - inode lockdep annotation rework to fix bugs with too many
     subclasses

   - directory inode locking changes to prevent lockdep false positives

   - a handful of minor corruption fixes

   - various other small cleanups and bug fixes"

* tag 'xfs-for-linus-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: (42 commits)
  xfs: fix error gotos in xfs_setattr_nonsize
  xfs: add mssing inode cache attempts counter increment
  xfs: return errors from partial I/O failures to files
  libxfs: bad magic number should set da block buffer error
  xfs: fix non-debug build warnings
  xfs: collapse allocsize and biosize mount option handling
  xfs: Fix file type directory corruption for btree directories
  xfs: lockdep annotations throw warnings on non-debug builds
  xfs: Fix uninitialized return value in xfs_alloc_fix_freelist()
  xfs: inode lockdep annotations broke non-lockdep build
  xfs: flush entire file on dio read/write to cached file
  xfs: Fix xfs_attr_leafblock definition
  libxfs: readahead of dir3 data blocks should use the read verifier
  xfs: stop holding ILOCK over filldir callbacks
  xfs: clean up inode lockdep annotations
  xfs: swap leaf buffer into path struct atomically during path shift
  xfs: relocate sparse inode mount warning
  xfs: dquots should be stamped with sb_meta_uuid
  xfs: log recovery needs to validate against sb_meta_uuid
  xfs: growfs not aware of sb_meta_uuid
  ...

56 files changed:
fs/xfs/Makefile
fs/xfs/libxfs/xfs_alloc.c
fs/xfs/libxfs/xfs_alloc_btree.c
fs/xfs/libxfs/xfs_attr.c
fs/xfs/libxfs/xfs_attr_leaf.c
fs/xfs/libxfs/xfs_attr_remote.c
fs/xfs/libxfs/xfs_bit.c [new file with mode: 0644]
fs/xfs/libxfs/xfs_bmap.c
fs/xfs/libxfs/xfs_bmap_btree.c
fs/xfs/libxfs/xfs_btree.c
fs/xfs/libxfs/xfs_da_btree.c
fs/xfs/libxfs/xfs_da_format.h
fs/xfs/libxfs/xfs_dir2.c
fs/xfs/libxfs/xfs_dir2_block.c
fs/xfs/libxfs/xfs_dir2_data.c
fs/xfs/libxfs/xfs_dir2_leaf.c
fs/xfs/libxfs/xfs_dir2_node.c
fs/xfs/libxfs/xfs_dquot_buf.c
fs/xfs/libxfs/xfs_format.h
fs/xfs/libxfs/xfs_ialloc.c
fs/xfs/libxfs/xfs_ialloc_btree.c
fs/xfs/libxfs/xfs_inode_buf.c
fs/xfs/libxfs/xfs_sb.c
fs/xfs/libxfs/xfs_symlink_remote.c
fs/xfs/xfs_aops.c
fs/xfs/xfs_bit.c [deleted file]
fs/xfs/xfs_bmap_util.c
fs/xfs/xfs_buf.c
fs/xfs/xfs_buf_item.c
fs/xfs/xfs_buf_item.h
fs/xfs/xfs_dir2_readdir.c
fs/xfs/xfs_dquot.c
fs/xfs/xfs_extfree_item.c
fs/xfs/xfs_extfree_item.h
fs/xfs/xfs_file.c
fs/xfs/xfs_fsops.c
fs/xfs/xfs_icache.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode.h
fs/xfs/xfs_inode_item.c
fs/xfs/xfs_iops.c
fs/xfs/xfs_itable.c
fs/xfs/xfs_log.c
fs/xfs/xfs_log.h
fs/xfs/xfs_log_cil.c
fs/xfs/xfs_log_priv.h
fs/xfs/xfs_log_recover.c
fs/xfs/xfs_mount.c
fs/xfs/xfs_rtalloc.c
fs/xfs/xfs_super.c
fs/xfs/xfs_symlink.c
fs/xfs/xfs_trace.h
fs/xfs/xfs_trans.c
fs/xfs/xfs_trans.h
fs/xfs/xfs_trans_extfree.c
fs/xfs/xfs_trans_priv.h

index df68285..a096841 100644 (file)
@@ -33,6 +33,7 @@ xfs-y                         += $(addprefix libxfs/, \
                                   xfs_attr.o \
                                   xfs_attr_leaf.o \
                                   xfs_attr_remote.o \
+                                  xfs_bit.o \
                                   xfs_bmap.o \
                                   xfs_bmap_btree.o \
                                   xfs_btree.o \
@@ -63,7 +64,6 @@ xfs-$(CONFIG_XFS_RT)          += $(addprefix libxfs/, \
 xfs-y                          += xfs_aops.o \
                                   xfs_attr_inactive.o \
                                   xfs_attr_list.o \
-                                  xfs_bit.o \
                                   xfs_bmap_util.o \
                                   xfs_buf.o \
                                   xfs_dir2_readdir.o \
index f9e9ffe..ffad7f2 100644 (file)
@@ -464,7 +464,7 @@ xfs_agfl_verify(
        struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
        int             i;
 
-       if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_uuid))
+       if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
                return false;
        if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC)
                return false;
@@ -1937,7 +1937,7 @@ xfs_alloc_fix_freelist(
        struct xfs_alloc_arg    targs;  /* local allocation arguments */
        xfs_agblock_t           bno;    /* freelist block */
        xfs_extlen_t            need;   /* total blocks needed in freelist */
-       int                     error;
+       int                     error = 0;
 
        if (!pag->pagf_init) {
                error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
@@ -2260,7 +2260,7 @@ xfs_agf_verify(
        struct xfs_agf  *agf = XFS_BUF_TO_AGF(bp);
 
        if (xfs_sb_version_hascrc(&mp->m_sb) &&
-           !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_uuid))
+           !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
                        return false;
 
        if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
index 59d521c..90de071 100644 (file)
@@ -295,7 +295,7 @@ xfs_allocbt_verify(
        case cpu_to_be32(XFS_ABTB_CRC_MAGIC):
                if (!xfs_sb_version_hascrc(&mp->m_sb))
                        return false;
-               if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
+               if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
                        return false;
                if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
                        return false;
@@ -313,7 +313,7 @@ xfs_allocbt_verify(
        case cpu_to_be32(XFS_ABTC_CRC_MAGIC):
                if (!xfs_sb_version_hascrc(&mp->m_sb))
                        return false;
-               if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
+               if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
                        return false;
                if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
                        return false;
index 3349c9a..ff06557 100644 (file)
@@ -139,6 +139,8 @@ xfs_attr_get(
 
        args.value = value;
        args.valuelen = *valuelenp;
+       /* Entirely possible to look up a name which doesn't exist */
+       args.op_flags = XFS_DA_OP_OKNOENT;
 
        lock_mode = xfs_ilock_attr_map_shared(ip);
        if (!xfs_inode_hasattr(ip))
index e9d401c..33df52d 100644 (file)
@@ -262,7 +262,7 @@ xfs_attr3_leaf_verify(
                if (ichdr.magic != XFS_ATTR3_LEAF_MAGIC)
                        return false;
 
-               if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_uuid))
+               if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid))
                        return false;
                if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
                        return false;
@@ -1056,7 +1056,7 @@ xfs_attr3_leaf_create(
 
                hdr3->blkno = cpu_to_be64(bp->b_bn);
                hdr3->owner = cpu_to_be64(dp->i_ino);
-               uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid);
+               uuid_copy(&hdr3->uuid, &mp->m_sb.sb_meta_uuid);
 
                ichdr.freemap[0].base = sizeof(struct xfs_attr3_leaf_hdr);
        } else {
index dd71403..f38f9bd 100644 (file)
@@ -100,7 +100,7 @@ xfs_attr3_rmt_verify(
                return false;
        if (rmt->rm_magic != cpu_to_be32(XFS_ATTR3_RMT_MAGIC))
                return false;
-       if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_uuid))
+       if (!uuid_equal(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid))
                return false;
        if (be64_to_cpu(rmt->rm_blkno) != bno)
                return false;
@@ -222,7 +222,7 @@ xfs_attr3_rmt_hdr_set(
        rmt->rm_magic = cpu_to_be32(XFS_ATTR3_RMT_MAGIC);
        rmt->rm_offset = cpu_to_be32(offset);
        rmt->rm_bytes = cpu_to_be32(size);
-       uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_uuid);
+       uuid_copy(&rmt->rm_uuid, &mp->m_sb.sb_meta_uuid);
        rmt->rm_owner = cpu_to_be64(ino);
        rmt->rm_blkno = cpu_to_be64(bno);
 
@@ -618,9 +618,8 @@ xfs_attr_rmtval_remove(
 
                xfs_bmap_init(args->flist, args->firstblock);
                error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
-                                   XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
-                                   1, args->firstblock, args->flist,
-                                   &done);
+                                   XFS_BMAPI_ATTRFORK, 1, args->firstblock,
+                                   args->flist, &done);
                if (!error) {
                        error = xfs_bmap_finish(&args->trans, args->flist,
                                                &committed);
diff --git a/fs/xfs/libxfs/xfs_bit.c b/fs/xfs/libxfs/xfs_bit.c
new file mode 100644 (file)
index 0000000..0e8885a
--- /dev/null
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#include "xfs.h"
+#include "xfs_log_format.h"
+#include "xfs_bit.h"
+
+/*
+ * XFS bit manipulation routines, used in non-realtime code.
+ */
+
+/*
+ * Return whether bitmap is empty.
+ * Size is number of words in the bitmap, which is padded to word boundary
+ * Returns 1 for empty, 0 for non-empty.
+ */
+int
+xfs_bitmap_empty(uint *map, uint size)
+{
+       uint i;
+       uint ret = 0;
+
+       for (i = 0; i < size; i++) {
+               ret |= map[i];
+       }
+
+       return (ret == 0);
+}
+
+/*
+ * Count the number of contiguous bits set in the bitmap starting with bit
+ * start_bit.  Size is the size of the bitmap in words.
+ */
+int
+xfs_contig_bits(uint *map, uint        size, uint start_bit)
+{
+       uint * p = ((unsigned int *) map) + (start_bit >> BIT_TO_WORD_SHIFT);
+       uint result = 0;
+       uint tmp;
+
+       size <<= BIT_TO_WORD_SHIFT;
+
+       ASSERT(start_bit < size);
+       size -= start_bit & ~(NBWORD - 1);
+       start_bit &= (NBWORD - 1);
+       if (start_bit) {
+               tmp = *p++;
+               /* set to one first offset bits prior to start */
+               tmp |= (~0U >> (NBWORD-start_bit));
+               if (tmp != ~0U)
+                       goto found;
+               result += NBWORD;
+               size -= NBWORD;
+       }
+       while (size) {
+               if ((tmp = *p++) != ~0U)
+                       goto found;
+               result += NBWORD;
+               size -= NBWORD;
+       }
+       return result - start_bit;
+found:
+       return result + ffz(tmp) - start_bit;
+}
+
+/*
+ * This takes the bit number to start looking from and
+ * returns the next set bit from there.  It returns -1
+ * if there are no more bits set or the start bit is
+ * beyond the end of the bitmap.
+ *
+ * Size is the number of words, not bytes, in the bitmap.
+ */
+int xfs_next_bit(uint *map, uint size, uint start_bit)
+{
+       uint * p = ((unsigned int *) map) + (start_bit >> BIT_TO_WORD_SHIFT);
+       uint result = start_bit & ~(NBWORD - 1);
+       uint tmp;
+
+       size <<= BIT_TO_WORD_SHIFT;
+
+       if (start_bit >= size)
+               return -1;
+       size -= result;
+       start_bit &= (NBWORD - 1);
+       if (start_bit) {
+               tmp = *p++;
+               /* set to zero first offset bits prior to start */
+               tmp &= (~0U << start_bit);
+               if (tmp != 0U)
+                       goto found;
+               result += NBWORD;
+               size -= NBWORD;
+       }
+       while (size) {
+               if ((tmp = *p++) != 0U)
+                       goto found;
+               result += NBWORD;
+               size -= NBWORD;
+       }
+       return -1;
+found:
+       return result + ffs(tmp) - 1;
+}
index 63e05b6..8e2010d 100644 (file)
@@ -5945,6 +5945,7 @@ xfs_bmap_split_extent(
        return xfs_trans_commit(tp);
 
 out:
+       xfs_bmap_cancel(&free_list);
        xfs_trans_cancel(tp);
        return error;
 }
index 2c44c8e..6b0cf65 100644 (file)
@@ -349,7 +349,8 @@ xfs_bmbt_to_bmdr(
 
        if (xfs_sb_version_hascrc(&mp->m_sb)) {
                ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_CRC_MAGIC));
-               ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid));
+               ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid,
+                      &mp->m_sb.sb_meta_uuid));
                ASSERT(rblock->bb_u.l.bb_blkno ==
                       cpu_to_be64(XFS_BUF_DADDR_NULL));
        } else
@@ -647,7 +648,7 @@ xfs_bmbt_verify(
        case cpu_to_be32(XFS_BMAP_CRC_MAGIC):
                if (!xfs_sb_version_hascrc(&mp->m_sb))
                        return false;
-               if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid))
+               if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
                        return false;
                if (be64_to_cpu(block->bb_u.l.bb_blkno) != bp->b_bn)
                        return false;
index c72283d..f7d7ee7 100644 (file)
@@ -65,7 +65,8 @@ xfs_btree_check_lblock(
 
        if (xfs_sb_version_hascrc(&mp->m_sb)) {
                lblock_ok = lblock_ok &&
-                       uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid) &&
+                       uuid_equal(&block->bb_u.l.bb_uuid,
+                                  &mp->m_sb.sb_meta_uuid) &&
                        block->bb_u.l.bb_blkno == cpu_to_be64(
                                bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
        }
@@ -115,7 +116,8 @@ xfs_btree_check_sblock(
 
        if (xfs_sb_version_hascrc(&mp->m_sb)) {
                sblock_ok = sblock_ok &&
-                       uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid) &&
+                       uuid_equal(&block->bb_u.s.bb_uuid,
+                                  &mp->m_sb.sb_meta_uuid) &&
                        block->bb_u.s.bb_blkno == cpu_to_be64(
                                bp ? bp->b_bn : XFS_BUF_DADDR_NULL);
        }
@@ -1000,7 +1002,7 @@ xfs_btree_init_block_int(
                if (flags & XFS_BTREE_CRC_BLOCKS) {
                        buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
                        buf->bb_u.l.bb_owner = cpu_to_be64(owner);
-                       uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid);
+                       uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid);
                        buf->bb_u.l.bb_pad = 0;
                        buf->bb_u.l.bb_lsn = 0;
                }
@@ -1013,7 +1015,7 @@ xfs_btree_init_block_int(
                if (flags & XFS_BTREE_CRC_BLOCKS) {
                        buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
                        buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
-                       uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid);
+                       uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid);
                        buf->bb_u.s.bb_lsn = 0;
                }
        }
index 2385f8c..be43248 100644 (file)
@@ -146,7 +146,7 @@ xfs_da3_node_verify(
                if (ichdr.magic != XFS_DA3_NODE_MAGIC)
                        return false;
 
-               if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_uuid))
+               if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid))
                        return false;
                if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
                        return false;
@@ -233,6 +233,7 @@ xfs_da3_node_read_verify(
                        bp->b_ops->verify_read(bp);
                        return;
                default:
+                       xfs_buf_ioerror(bp, -EFSCORRUPTED);
                        break;
        }
 
@@ -324,7 +325,7 @@ xfs_da3_node_create(
                ichdr.magic = XFS_DA3_NODE_MAGIC;
                hdr3->info.blkno = cpu_to_be64(bp->b_bn);
                hdr3->info.owner = cpu_to_be64(args->dp->i_ino);
-               uuid_copy(&hdr3->info.uuid, &mp->m_sb.sb_uuid);
+               uuid_copy(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid);
        } else {
                ichdr.magic = XFS_DA_NODE_MAGIC;
        }
@@ -1822,6 +1823,7 @@ xfs_da3_path_shift(
        struct xfs_da_args      *args;
        struct xfs_da_node_entry *btree;
        struct xfs_da3_icnode_hdr nodehdr;
+       struct xfs_buf          *bp;
        xfs_dablk_t             blkno = 0;
        int                     level;
        int                     error;
@@ -1866,20 +1868,24 @@ xfs_da3_path_shift(
         */
        for (blk++, level++; level < path->active; blk++, level++) {
                /*
-                * Release the old block.
-                * (if it's dirty, trans won't actually let go)
+                * Read the next child block into a local buffer.
                 */
-               if (release)
-                       xfs_trans_brelse(args->trans, blk->bp);
+               error = xfs_da3_node_read(args->trans, dp, blkno, -1, &bp,
+                                         args->whichfork);
+               if (error)
+                       return error;
 
                /*
-                * Read the next child block.
+                * Release the old block (if it's dirty, the trans doesn't
+                * actually let go) and swap the local buffer into the path
+                * structure. This ensures failure of the above read doesn't set
+                * a NULL buffer in an active slot in the path.
                 */
+               if (release)
+                       xfs_trans_brelse(args->trans, blk->bp);
                blk->blkno = blkno;
-               error = xfs_da3_node_read(args->trans, dp, blkno, -1,
-                                       &blk->bp, args->whichfork);
-               if (error)
-                       return error;
+               blk->bp = bp;
+
                info = blk->bp->b_addr;
                ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
                       info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) ||
@@ -2351,8 +2357,8 @@ xfs_da_shrink_inode(
                 * the last block to the place we want to kill.
                 */
                error = xfs_bunmapi(tp, dp, dead_blkno, count,
-                                   xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
-                                   0, args->firstblock, args->flist, &done);
+                                   xfs_bmapi_aflag(w), 0, args->firstblock,
+                                   args->flist, &done);
                if (error == -ENOSPC) {
                        if (w != XFS_DATA_FORK)
                                break;
index 74bcbab..b14bbd6 100644 (file)
@@ -680,8 +680,15 @@ typedef struct xfs_attr_leaf_name_remote {
 typedef struct xfs_attr_leafblock {
        xfs_attr_leaf_hdr_t     hdr;    /* constant-structure header block */
        xfs_attr_leaf_entry_t   entries[1];     /* sorted on key, not name */
-       xfs_attr_leaf_name_local_t namelist;    /* grows from bottom of buf */
-       xfs_attr_leaf_name_remote_t valuelist;  /* grows from bottom of buf */
+       /*
+        * The rest of the block contains the following structures after the
+        * leaf entries, growing from the bottom up. The variables are never
+        * referenced and definining them can actually make gcc optimize away
+        * accesses to the 'entries' array above index 0 so don't do that.
+        *
+        * xfs_attr_leaf_name_local_t namelist;
+        * xfs_attr_leaf_name_remote_t valuelist;
+        */
 } xfs_attr_leafblock_t;
 
 /*
index a69fb3a..9de401d 100644 (file)
@@ -362,6 +362,7 @@ xfs_dir_lookup(
        struct xfs_da_args *args;
        int             rval;
        int             v;              /* type-checking value */
+       int             lock_mode;
 
        ASSERT(S_ISDIR(dp->i_d.di_mode));
        XFS_STATS_INC(xs_dir_lookup);
@@ -387,6 +388,7 @@ xfs_dir_lookup(
        if (ci_name)
                args->op_flags |= XFS_DA_OP_CILOOKUP;
 
+       lock_mode = xfs_ilock_data_map_shared(dp);
        if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
                rval = xfs_dir2_sf_lookup(args);
                goto out_check_rval;
@@ -419,6 +421,7 @@ out_check_rval:
                }
        }
 out_free:
+       xfs_iunlock(dp, lock_mode);
        kmem_free(args);
        return rval;
 }
@@ -674,25 +677,22 @@ xfs_dir2_shrink_inode(
        mp = dp->i_mount;
        tp = args->trans;
        da = xfs_dir2_db_to_da(args->geo, db);
-       /*
-        * Unmap the fsblock(s).
-        */
-       if ((error = xfs_bunmapi(tp, dp, da, args->geo->fsbcount,
-                       XFS_BMAPI_METADATA, 0, args->firstblock, args->flist,
-                       &done))) {
+
+       /* Unmap the fsblock(s). */
+       error = xfs_bunmapi(tp, dp, da, args->geo->fsbcount, 0, 0,
+                           args->firstblock, args->flist, &done);
+       if (error) {
                /*
-                * ENOSPC actually can happen if we're in a removename with
-                * no space reservation, and the resulting block removal
-                * would cause a bmap btree split or conversion from extents
-                * to btree.  This can only happen for un-fragmented
-                * directory blocks, since you need to be punching out
-                * the middle of an extent.
-                * In this case we need to leave the block in the file,
-                * and not binval it.
-                * So the block has to be in a consistent empty state
-                * and appropriately logged.
-                * We don't free up the buffer, the caller can tell it
-                * hasn't happened since it got an error back.
+                * ENOSPC actually can happen if we're in a removename with no
+                * space reservation, and the resulting block removal would
+                * cause a bmap btree split or conversion from extents to btree.
+                * This can only happen for un-fragmented directory blocks,
+                * since you need to be punching out the middle of an extent.
+                * In this case we need to leave the block in the file, and not
+                * binval it.  So the block has to be in a consistent empty
+                * state and appropriately logged.  We don't free up the buffer,
+                * the caller can tell it hasn't happened since it got an error
+                * back.
                 */
                return error;
        }
index 9354e19..4778d1d 100644 (file)
@@ -67,7 +67,7 @@ xfs_dir3_block_verify(
        if (xfs_sb_version_hascrc(&mp->m_sb)) {
                if (hdr3->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC))
                        return false;
-               if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid))
+               if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid))
                        return false;
                if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
                        return false;
@@ -157,7 +157,7 @@ xfs_dir3_block_init(
                hdr3->magic = cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
                hdr3->blkno = cpu_to_be64(bp->b_bn);
                hdr3->owner = cpu_to_be64(dp->i_ino);
-               uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid);
+               uuid_copy(&hdr3->uuid, &mp->m_sb.sb_meta_uuid);
                return;
 
        }
index de1ea16..824131e 100644 (file)
@@ -220,7 +220,7 @@ xfs_dir3_data_verify(
        if (xfs_sb_version_hascrc(&mp->m_sb)) {
                if (hdr3->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC))
                        return false;
-               if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid))
+               if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid))
                        return false;
                if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
                        return false;
@@ -252,7 +252,8 @@ xfs_dir3_data_reada_verify(
                return;
        case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
        case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
-               xfs_dir3_data_verify(bp);
+               bp->b_ops = &xfs_dir3_data_buf_ops;
+               bp->b_ops->verify_read(bp);
                return;
        default:
                xfs_buf_ioerror(bp, -EFSCORRUPTED);
@@ -604,7 +605,7 @@ xfs_dir3_data_init(
                hdr3->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
                hdr3->blkno = cpu_to_be64(bp->b_bn);
                hdr3->owner = cpu_to_be64(dp->i_ino);
-               uuid_copy(&hdr3->uuid, &mp->m_sb.sb_uuid);
+               uuid_copy(&hdr3->uuid, &mp->m_sb.sb_meta_uuid);
 
        } else
                hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
index 1061199..f300240 100644 (file)
@@ -160,7 +160,7 @@ xfs_dir3_leaf_verify(
 
                if (leaf3->info.hdr.magic != cpu_to_be16(magic3))
                        return false;
-               if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_uuid))
+               if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid))
                        return false;
                if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
                        return false;
@@ -310,7 +310,7 @@ xfs_dir3_leaf_init(
                                         : cpu_to_be16(XFS_DIR3_LEAFN_MAGIC);
                leaf3->info.blkno = cpu_to_be64(bp->b_bn);
                leaf3->info.owner = cpu_to_be64(owner);
-               uuid_copy(&leaf3->info.uuid, &mp->m_sb.sb_uuid);
+               uuid_copy(&leaf3->info.uuid, &mp->m_sb.sb_meta_uuid);
        } else {
                memset(leaf, 0, sizeof(*leaf));
                leaf->hdr.info.magic = cpu_to_be16(type);
index 41b80d3..cc28e92 100644 (file)
@@ -93,7 +93,7 @@ xfs_dir3_free_verify(
 
                if (hdr3->magic != cpu_to_be32(XFS_DIR3_FREE_MAGIC))
                        return false;
-               if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_uuid))
+               if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid))
                        return false;
                if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
                        return false;
@@ -226,7 +226,7 @@ xfs_dir3_free_get_buf(
 
                hdr3->hdr.blkno = cpu_to_be64(bp->b_bn);
                hdr3->hdr.owner = cpu_to_be64(dp->i_ino);
-               uuid_copy(&hdr3->hdr.uuid, &mp->m_sb.sb_uuid);
+               uuid_copy(&hdr3->hdr.uuid, &mp->m_sb.sb_meta_uuid);
        } else
                hdr.magic = XFS_DIR2_FREE_MAGIC;
        dp->d_ops->free_hdr_to_disk(bp->b_addr, &hdr);
@@ -1845,8 +1845,7 @@ xfs_dir2_node_addname_int(
 
                        if (dp->d_ops->db_to_fdb(args->geo, dbno) != fbno) {
                                xfs_alert(mp,
-                       "%s: dir ino %llu needed freesp block %lld for\n"
-                       "  data block %lld, got %lld ifbno %llu lastfbno %d",
+"%s: dir ino %llu needed freesp block %lld for data block %lld, got %lld ifbno %llu lastfbno %d",
                                        __func__, (unsigned long long)dp->i_ino,
                                        (long long)dp->d_ops->db_to_fdb(
                                                                args->geo, dbno),
@@ -2132,6 +2131,7 @@ xfs_dir2_node_replace(
        int                     error;          /* error return value */
        int                     i;              /* btree level */
        xfs_ino_t               inum;           /* new inode number */
+       int                     ftype;          /* new file type */
        xfs_dir2_leaf_t         *leaf;          /* leaf structure */
        xfs_dir2_leaf_entry_t   *lep;           /* leaf entry being changed */
        int                     rval;           /* internal return value */
@@ -2145,7 +2145,14 @@ xfs_dir2_node_replace(
        state = xfs_da_state_alloc();
        state->args = args;
        state->mp = args->dp->i_mount;
+
+       /*
+        * We have to save new inode number and ftype since
+        * xfs_da3_node_lookup_int() is going to overwrite them
+        */
        inum = args->inumber;
+       ftype = args->filetype;
+
        /*
         * Lookup the entry to change in the btree.
         */
@@ -2183,7 +2190,7 @@ xfs_dir2_node_replace(
                 * Fill in the new inode number and log the entry.
                 */
                dep->inumber = cpu_to_be64(inum);
-               args->dp->d_ops->data_put_ftype(dep, args->filetype);
+               args->dp->d_ops->data_put_ftype(dep, ftype);
                xfs_dir2_data_log_entry(args, state->extrablk.bp, dep);
                rval = 0;
        }
index 6fbf2d8..5331b7f 100644 (file)
@@ -163,7 +163,7 @@ xfs_dqcheck(
        d->dd_diskdq.d_id = cpu_to_be32(id);
 
        if (xfs_sb_version_hascrc(&mp->m_sb)) {
-               uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid);
+               uuid_copy(&d->dd_uuid, &mp->m_sb.sb_meta_uuid);
                xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
                                 XFS_DQUOT_CRC_OFF);
        }
@@ -198,7 +198,7 @@ xfs_dquot_buf_verify_crc(
                if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk),
                                 XFS_DQUOT_CRC_OFF))
                        return false;
-               if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_uuid))
+               if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_meta_uuid))
                        return false;
        }
        return true;
index a0ae572..9590a06 100644 (file)
@@ -100,7 +100,7 @@ typedef struct xfs_sb {
        xfs_rfsblock_t  sb_dblocks;     /* number of data blocks */
        xfs_rfsblock_t  sb_rblocks;     /* number of realtime blocks */
        xfs_rtblock_t   sb_rextents;    /* number of realtime extents */
-       uuid_t          sb_uuid;        /* file system unique id */
+       uuid_t          sb_uuid;        /* user-visible file system unique id */
        xfs_fsblock_t   sb_logstart;    /* starting block of log if internal */
        xfs_ino_t       sb_rootino;     /* root inode number */
        xfs_ino_t       sb_rbmino;      /* bitmap inode for realtime extents */
@@ -174,6 +174,7 @@ typedef struct xfs_sb {
 
        xfs_ino_t       sb_pquotino;    /* project quota inode */
        xfs_lsn_t       sb_lsn;         /* last write sequence */
+       uuid_t          sb_meta_uuid;   /* metadata file system unique id */
 
        /* must be padded to 64 bit alignment */
 } xfs_sb_t;
@@ -190,7 +191,7 @@ typedef struct xfs_dsb {
        __be64          sb_dblocks;     /* number of data blocks */
        __be64          sb_rblocks;     /* number of realtime blocks */
        __be64          sb_rextents;    /* number of realtime extents */
-       uuid_t          sb_uuid;        /* file system unique id */
+       uuid_t          sb_uuid;        /* user-visible file system unique id */
        __be64          sb_logstart;    /* starting block of log if internal */
        __be64          sb_rootino;     /* root inode number */
        __be64          sb_rbmino;      /* bitmap inode for realtime extents */
@@ -260,6 +261,7 @@ typedef struct xfs_dsb {
 
        __be64          sb_pquotino;    /* project quota inode */
        __be64          sb_lsn;         /* last write sequence */
+       uuid_t          sb_meta_uuid;   /* metadata file system unique id */
 
        /* must be padded to 64 bit alignment */
 } xfs_dsb_t;
@@ -458,9 +460,11 @@ xfs_sb_has_ro_compat_feature(
 
 #define XFS_SB_FEAT_INCOMPAT_FTYPE     (1 << 0)        /* filetype in dirent */
 #define XFS_SB_FEAT_INCOMPAT_SPINODES  (1 << 1)        /* sparse inode chunks */
+#define XFS_SB_FEAT_INCOMPAT_META_UUID (1 << 2)        /* metadata UUID */
 #define XFS_SB_FEAT_INCOMPAT_ALL \
                (XFS_SB_FEAT_INCOMPAT_FTYPE|    \
-                XFS_SB_FEAT_INCOMPAT_SPINODES)
+                XFS_SB_FEAT_INCOMPAT_SPINODES| \
+                XFS_SB_FEAT_INCOMPAT_META_UUID)
 
 #define XFS_SB_FEAT_INCOMPAT_UNKNOWN   ~XFS_SB_FEAT_INCOMPAT_ALL
 static inline bool
@@ -514,6 +518,18 @@ static inline bool xfs_sb_version_hassparseinodes(struct xfs_sb *sbp)
                xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_SPINODES);
 }
 
+/*
+ * XFS_SB_FEAT_INCOMPAT_META_UUID indicates that the metadata UUID
+ * is stored separately from the user-visible UUID; this allows the
+ * user-visible UUID to be changed on V5 filesystems which have a
+ * filesystem UUID stamped into every piece of metadata.
+ */
+static inline bool xfs_sb_version_hasmetauuid(struct xfs_sb *sbp)
+{
+       return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) &&
+               (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID);
+}
+
 /*
  * end of superblock version macros
  */
index 66efc70..54deb2d 100644 (file)
@@ -338,7 +338,8 @@ xfs_ialloc_inode_init(
                        if (version == 3) {
                                free->di_ino = cpu_to_be64(ino);
                                ino++;
-                               uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid);
+                               uuid_copy(&free->di_uuid,
+                                         &mp->m_sb.sb_meta_uuid);
                                xfs_dinode_calc_crc(mp, free);
                        } else if (tp) {
                                /* just log the inode core */
@@ -2232,7 +2233,7 @@ xfs_imap_lookup(
        }
 
        xfs_trans_brelse(tp, agbp);
-       xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+       xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
        if (error)
                return error;
 
@@ -2500,7 +2501,7 @@ xfs_agi_verify(
        struct xfs_agi  *agi = XFS_BUF_TO_AGI(bp);
 
        if (xfs_sb_version_hascrc(&mp->m_sb) &&
-           !uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_uuid))
+           !uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid))
                        return false;
        /*
         * Validate the magic number of the agi block.
index 674ad8f..f39b285 100644 (file)
@@ -239,7 +239,7 @@ xfs_inobt_verify(
        case cpu_to_be32(XFS_FIBT_CRC_MAGIC):
                if (!xfs_sb_version_hascrc(&mp->m_sb))
                        return false;
-               if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
+               if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
                        return false;
                if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
                        return false;
index 6526e76..268c00f 100644 (file)
@@ -304,7 +304,7 @@ xfs_dinode_verify(
                return false;
        if (be64_to_cpu(dip->di_ino) != ip->i_ino)
                return false;
-       if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_uuid))
+       if (!uuid_equal(&dip->di_uuid, &mp->m_sb.sb_meta_uuid))
                return false;
        return true;
 }
@@ -366,7 +366,7 @@ xfs_iread(
                if (xfs_sb_version_hascrc(&mp->m_sb)) {
                        ip->i_d.di_version = 3;
                        ip->i_d.di_ino = ip->i_ino;
-                       uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid);
+                       uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_meta_uuid);
                } else
                        ip->i_d.di_version = 2;
                return 0;
index df9851c..4742514 100644 (file)
@@ -131,10 +131,11 @@ xfs_mount_validate_sb(
                if (xfs_sb_has_compat_feature(sbp,
                                        XFS_SB_FEAT_COMPAT_UNKNOWN)) {
                        xfs_warn(mp,
-"Superblock has unknown compatible features (0x%x) enabled.\n"
-"Using a more recent kernel is recommended.",
+"Superblock has unknown compatible features (0x%x) enabled.",
                                (sbp->sb_features_compat &
                                                XFS_SB_FEAT_COMPAT_UNKNOWN));
+                       xfs_warn(mp,
+"Using a more recent kernel is recommended.");
                }
 
                if (xfs_sb_has_ro_compat_feature(sbp,
@@ -145,18 +146,21 @@ xfs_mount_validate_sb(
                                                XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
                        if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
                                xfs_warn(mp,
-"Attempted to mount read-only compatible filesystem read-write.\n"
+"Attempted to mount read-only compatible filesystem read-write.");
+                               xfs_warn(mp,
 "Filesystem can only be safely mounted read only.");
+
                                return -EINVAL;
                        }
                }
                if (xfs_sb_has_incompat_feature(sbp,
                                        XFS_SB_FEAT_INCOMPAT_UNKNOWN)) {
                        xfs_warn(mp,
-"Superblock has unknown incompatible features (0x%x) enabled.\n"
-"Filesystem can not be safely mounted by this kernel.",
+"Superblock has unknown incompatible features (0x%x) enabled.",
                                (sbp->sb_features_incompat &
                                                XFS_SB_FEAT_INCOMPAT_UNKNOWN));
+                       xfs_warn(mp,
+"Filesystem can not be safely mounted by this kernel.");
                        return -EINVAL;
                }
        }
@@ -182,9 +186,6 @@ xfs_mount_validate_sb(
        if (xfs_sb_version_hassparseinodes(sbp)) {
                uint32_t        align;
 
-               xfs_alert(mp,
-       "EXPERIMENTAL sparse inode feature enabled. Use at your own risk!");
-
                align = XFS_INODES_PER_CHUNK * sbp->sb_inodesize
                                >> sbp->sb_blocklog;
                if (sbp->sb_inoalignmt != align) {
@@ -398,6 +399,14 @@ __xfs_sb_from_disk(
        to->sb_spino_align = be32_to_cpu(from->sb_spino_align);
        to->sb_pquotino = be64_to_cpu(from->sb_pquotino);
        to->sb_lsn = be64_to_cpu(from->sb_lsn);
+       /*
+        * sb_meta_uuid is only on disk if it differs from sb_uuid and the
+        * feature flag is set; if not set we keep it only in memory.
+        */
+       if (xfs_sb_version_hasmetauuid(to))
+               uuid_copy(&to->sb_meta_uuid, &from->sb_meta_uuid);
+       else
+               uuid_copy(&to->sb_meta_uuid, &from->sb_uuid);
        /* Convert on-disk flags to in-memory flags? */
        if (convert_xquota)
                xfs_sb_quota_from_disk(to);
@@ -539,6 +548,8 @@ xfs_sb_to_disk(
                                cpu_to_be32(from->sb_features_log_incompat);
                to->sb_spino_align = cpu_to_be32(from->sb_spino_align);
                to->sb_lsn = cpu_to_be64(from->sb_lsn);
+               if (xfs_sb_version_hasmetauuid(from))
+                       uuid_copy(&to->sb_meta_uuid, &from->sb_meta_uuid);
        }
 }
 
index e7e26bd..8f8af05 100644 (file)
@@ -63,7 +63,7 @@ xfs_symlink_hdr_set(
        dsl->sl_magic = cpu_to_be32(XFS_SYMLINK_MAGIC);
        dsl->sl_offset = cpu_to_be32(offset);
        dsl->sl_bytes = cpu_to_be32(size);
-       uuid_copy(&dsl->sl_uuid, &mp->m_sb.sb_uuid);
+       uuid_copy(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid);
        dsl->sl_owner = cpu_to_be64(ino);
        dsl->sl_blkno = cpu_to_be64(bp->b_bn);
        bp->b_ops = &xfs_symlink_buf_ops;
@@ -107,7 +107,7 @@ xfs_symlink_verify(
                return false;
        if (dsl->sl_magic != cpu_to_be32(XFS_SYMLINK_MAGIC))
                return false;
-       if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_uuid))
+       if (!uuid_equal(&dsl->sl_uuid, &mp->m_sb.sb_meta_uuid))
                return false;
        if (bp->b_bn != be64_to_cpu(dsl->sl_blkno))
                return false;
index cc2a321..50ab287 100644 (file)
@@ -353,7 +353,8 @@ xfs_end_bio(
 {
        xfs_ioend_t             *ioend = bio->bi_private;
 
-       ioend->io_error = bio->bi_error;
+       if (!ioend->io_error)
+               ioend->io_error = bio->bi_error;
 
        /* Toss bio and pass work off to an xfsdatad thread */
        bio->bi_private = NULL;
diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c
deleted file mode 100644 (file)
index 0e8885a..0000000
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#include "xfs.h"
-#include "xfs_log_format.h"
-#include "xfs_bit.h"
-
-/*
- * XFS bit manipulation routines, used in non-realtime code.
- */
-
-/*
- * Return whether bitmap is empty.
- * Size is number of words in the bitmap, which is padded to word boundary
- * Returns 1 for empty, 0 for non-empty.
- */
-int
-xfs_bitmap_empty(uint *map, uint size)
-{
-       uint i;
-       uint ret = 0;
-
-       for (i = 0; i < size; i++) {
-               ret |= map[i];
-       }
-
-       return (ret == 0);
-}
-
-/*
- * Count the number of contiguous bits set in the bitmap starting with bit
- * start_bit.  Size is the size of the bitmap in words.
- */
-int
-xfs_contig_bits(uint *map, uint        size, uint start_bit)
-{
-       uint * p = ((unsigned int *) map) + (start_bit >> BIT_TO_WORD_SHIFT);
-       uint result = 0;
-       uint tmp;
-
-       size <<= BIT_TO_WORD_SHIFT;
-
-       ASSERT(start_bit < size);
-       size -= start_bit & ~(NBWORD - 1);
-       start_bit &= (NBWORD - 1);
-       if (start_bit) {
-               tmp = *p++;
-               /* set to one first offset bits prior to start */
-               tmp |= (~0U >> (NBWORD-start_bit));
-               if (tmp != ~0U)
-                       goto found;
-               result += NBWORD;
-               size -= NBWORD;
-       }
-       while (size) {
-               if ((tmp = *p++) != ~0U)
-                       goto found;
-               result += NBWORD;
-               size -= NBWORD;
-       }
-       return result - start_bit;
-found:
-       return result + ffz(tmp) - start_bit;
-}
-
-/*
- * This takes the bit number to start looking from and
- * returns the next set bit from there.  It returns -1
- * if there are no more bits set or the start bit is
- * beyond the end of the bitmap.
- *
- * Size is the number of words, not bytes, in the bitmap.
- */
-int xfs_next_bit(uint *map, uint size, uint start_bit)
-{
-       uint * p = ((unsigned int *) map) + (start_bit >> BIT_TO_WORD_SHIFT);
-       uint result = start_bit & ~(NBWORD - 1);
-       uint tmp;
-
-       size <<= BIT_TO_WORD_SHIFT;
-
-       if (start_bit >= size)
-               return -1;
-       size -= result;
-       start_bit &= (NBWORD - 1);
-       if (start_bit) {
-               tmp = *p++;
-               /* set to zero first offset bits prior to start */
-               tmp &= (~0U << start_bit);
-               if (tmp != 0U)
-                       goto found;
-               result += NBWORD;
-               size -= NBWORD;
-       }
-       while (size) {
-               if ((tmp = *p++) != 0U)
-                       goto found;
-               result += NBWORD;
-               size -= NBWORD;
-       }
-       return -1;
-found:
-       return result + ffs(tmp) - 1;
-}
index 0f34886..3bf4ad0 100644 (file)
@@ -67,16 +67,15 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
  */
 int                                            /* error */
 xfs_bmap_finish(
-       xfs_trans_t             **tp,           /* transaction pointer addr */
-       xfs_bmap_free_t         *flist,         /* i/o: list extents to free */
-       int                     *committed)     /* xact committed or not */
+       struct xfs_trans                **tp,   /* transaction pointer addr */
+       struct xfs_bmap_free            *flist, /* i/o: list extents to free */
+       int                             *committed)/* xact committed or not */
 {
-       xfs_efd_log_item_t      *efd;           /* extent free data */
-       xfs_efi_log_item_t      *efi;           /* extent free intention */
-       int                     error;          /* error return value */
-       xfs_bmap_free_item_t    *free;          /* free extent item */
-       xfs_mount_t             *mp;            /* filesystem mount structure */
-       xfs_bmap_free_item_t    *next;          /* next item on free list */
+       struct xfs_efd_log_item         *efd;   /* extent free data */
+       struct xfs_efi_log_item         *efi;   /* extent free intention */
+       int                             error;  /* error return value */
+       struct xfs_bmap_free_item       *free;  /* free extent item */
+       struct xfs_bmap_free_item       *next;  /* next item on free list */
 
        ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
        if (flist->xbf_count == 0) {
@@ -88,40 +87,48 @@ xfs_bmap_finish(
                xfs_trans_log_efi_extent(*tp, efi, free->xbfi_startblock,
                        free->xbfi_blockcount);
 
-       error = xfs_trans_roll(tp, NULL);
-       *committed = 1;
-       /*
-        * We have a new transaction, so we should return committed=1,
-        * even though we're returning an error.
-        */
-       if (error)
+       error = __xfs_trans_roll(tp, NULL, committed);
+       if (error) {
+               /*
+                * If the transaction was committed, drop the EFD reference
+                * since we're bailing out of here. The other reference is
+                * dropped when the EFI hits the AIL.
+                *
+                * If the transaction was not committed, the EFI is freed by the
+                * EFI item unlock handler on abort. Also, we have a new
+                * transaction so we should return committed=1 even though we're
+                * returning an error.
+                */
+               if (*committed) {
+                       xfs_efi_release(efi);
+                       xfs_force_shutdown((*tp)->t_mountp,
+                               (error == -EFSCORRUPTED) ?
+                                       SHUTDOWN_CORRUPT_INCORE :
+                                       SHUTDOWN_META_IO_ERROR);
+               } else {
+                       *committed = 1;
+               }
+
                return error;
+       }
 
+       /*
+        * Get an EFD and free each extent in the list, logging to the EFD in
+        * the process. The remaining bmap free list is cleaned up by the caller
+        * on error.
+        */
        efd = xfs_trans_get_efd(*tp, efi, flist->xbf_count);
        for (free = flist->xbf_first; free != NULL; free = next) {
                next = free->xbfi_next;
-               if ((error = xfs_free_extent(*tp, free->xbfi_startblock,
-                               free->xbfi_blockcount))) {
-                       /*
-                        * The bmap free list will be cleaned up at a
-                        * higher level.  The EFI will be canceled when
-                        * this transaction is aborted.
-                        * Need to force shutdown here to make sure it
-                        * happens, since this transaction may not be
-                        * dirty yet.
-                        */
-                       mp = (*tp)->t_mountp;
-                       if (!XFS_FORCED_SHUTDOWN(mp))
-                               xfs_force_shutdown(mp,
-                                                  (error == -EFSCORRUPTED) ?
-                                                  SHUTDOWN_CORRUPT_INCORE :
-                                                  SHUTDOWN_META_IO_ERROR);
+
+               error = xfs_trans_free_extent(*tp, efd, free->xbfi_startblock,
+                                             free->xbfi_blockcount);
+               if (error)
                        return error;
-               }
-               xfs_trans_log_efd_extent(*tp, efd, free->xbfi_startblock,
-                       free->xbfi_blockcount);
+
                xfs_bmap_del_free(flist, NULL, free);
        }
+
        return 0;
 }
 
@@ -1467,7 +1474,7 @@ xfs_shift_file_space(
                                XFS_DIOSTRAT_SPACE_RES(mp, 0), 0,
                                XFS_QMOPT_RES_REGBLKS);
                if (error)
-                       goto out;
+                       goto out_trans_cancel;
 
                xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 
@@ -1481,18 +1488,20 @@ xfs_shift_file_space(
                                &done, stop_fsb, &first_block, &free_list,
                                direction, XFS_BMAP_MAX_SHIFT_EXTENTS);
                if (error)
-                       goto out;
+                       goto out_bmap_cancel;
 
                error = xfs_bmap_finish(&tp, &free_list, &committed);
                if (error)
-                       goto out;
+                       goto out_bmap_cancel;
 
                error = xfs_trans_commit(tp);
        }
 
        return error;
 
-out:
+out_bmap_cancel:
+       xfs_bmap_cancel(&free_list);
+out_trans_cancel:
        xfs_trans_cancel(tp);
        return error;
 }
index 01bd678..8ecffb3 100644 (file)
@@ -438,7 +438,6 @@ _xfs_buf_find(
        xfs_buf_flags_t         flags,
        xfs_buf_t               *new_bp)
 {
-       size_t                  numbytes;
        struct xfs_perag        *pag;
        struct rb_node          **rbp;
        struct rb_node          *parent;
@@ -450,10 +449,9 @@ _xfs_buf_find(
 
        for (i = 0; i < nmaps; i++)
                numblks += map[i].bm_len;
-       numbytes = BBTOB(numblks);
 
        /* Check for IOs smaller than the sector size / not sector aligned */
-       ASSERT(!(numbytes < btp->bt_meta_sectorsize));
+       ASSERT(!(BBTOB(numblks) < btp->bt_meta_sectorsize));
        ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_meta_sectormask));
 
        /*
@@ -1532,9 +1530,10 @@ xfs_wait_buftarg(
                        list_del_init(&bp->b_lru);
                        if (bp->b_flags & XBF_WRITE_FAIL) {
                                xfs_alert(btp->bt_mount,
-"Corruption Alert: Buffer at block 0x%llx had permanent write failures!\n"
-"Please run xfs_repair to determine the extent of the problem.",
+"Corruption Alert: Buffer at block 0x%llx had permanent write failures!",
                                        (long long)bp->b_bn);
+                               xfs_alert(btp->bt_mount,
+"Please run xfs_repair to determine the extent of the problem.");
                        }
                        xfs_buf_rele(bp);
                }
index 092d652..7e986da 100644 (file)
@@ -647,11 +647,7 @@ xfs_buf_item_unlock(
                        xfs_buf_item_relse(bp);
                else if (aborted) {
                        ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp));
-                       if (lip->li_flags & XFS_LI_IN_AIL) {
-                               spin_lock(&lip->li_ailp->xa_lock);
-                               xfs_trans_ail_delete(lip->li_ailp, lip,
-                                                    SHUTDOWN_LOG_IO_ERROR);
-                       }
+                       xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR);
                        xfs_buf_item_relse(bp);
                }
        }
@@ -750,13 +746,13 @@ xfs_buf_item_free_format(
  * buffer (see xfs_buf_attach_iodone() below), then put the
  * buf log item at the front.
  */
-void
+int
 xfs_buf_item_init(
-       xfs_buf_t       *bp,
-       xfs_mount_t     *mp)
+       struct xfs_buf  *bp,
+       struct xfs_mount *mp)
 {
-       xfs_log_item_t          *lip = bp->b_fspriv;
-       xfs_buf_log_item_t      *bip;
+       struct xfs_log_item     *lip = bp->b_fspriv;
+       struct xfs_buf_log_item *bip;
        int                     chunks;
        int                     map_size;
        int                     error;
@@ -770,12 +766,11 @@ xfs_buf_item_init(
         */
        ASSERT(bp->b_target->bt_mount == mp);
        if (lip != NULL && lip->li_type == XFS_LI_BUF)
-               return;
+               return 0;
 
        bip = kmem_zone_zalloc(xfs_buf_item_zone, KM_SLEEP);
        xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops);
        bip->bli_buf = bp;
-       xfs_buf_hold(bp);
 
        /*
         * chunks is the number of XFS_BLF_CHUNK size pieces the buffer
@@ -788,6 +783,11 @@ xfs_buf_item_init(
         */
        error = xfs_buf_item_get_format(bip, bp->b_map_count);
        ASSERT(error == 0);
+       if (error) {    /* to stop gcc throwing set-but-unused warnings */
+               kmem_zone_free(xfs_buf_item_zone, bip);
+               return error;
+       }
+
 
        for (i = 0; i < bip->bli_format_count; i++) {
                chunks = DIV_ROUND_UP(BBTOB(bp->b_maps[i].bm_len),
@@ -807,6 +807,8 @@ xfs_buf_item_init(
        if (bp->b_fspriv)
                bip->bli_item.li_bio_list = bp->b_fspriv;
        bp->b_fspriv = bip;
+       xfs_buf_hold(bp);
+       return 0;
 }
 
 
index 3f3455a..f7eba99 100644 (file)
@@ -61,7 +61,7 @@ typedef struct xfs_buf_log_item {
        struct xfs_buf_log_format __bli_format; /* embedded in-log header */
 } xfs_buf_log_item_t;
 
-void   xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
+int    xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
 void   xfs_buf_item_relse(struct xfs_buf *);
 void   xfs_buf_item_log(xfs_buf_log_item_t *, uint, uint);
 uint   xfs_buf_item_dirty(xfs_buf_log_item_t *);
index 098cd78..a989a9c 100644 (file)
@@ -171,6 +171,7 @@ xfs_dir2_block_getdents(
        int                     wantoff;        /* starting block offset */
        xfs_off_t               cook;
        struct xfs_da_geometry  *geo = args->geo;
+       int                     lock_mode;
 
        /*
         * If the block number in the offset is out of range, we're done.
@@ -178,7 +179,9 @@ xfs_dir2_block_getdents(
        if (xfs_dir2_dataptr_to_db(geo, ctx->pos) > geo->datablk)
                return 0;
 
+       lock_mode = xfs_ilock_data_map_shared(dp);
        error = xfs_dir3_block_read(NULL, dp, &bp);
+       xfs_iunlock(dp, lock_mode);
        if (error)
                return error;
 
@@ -529,9 +532,12 @@ xfs_dir2_leaf_getdents(
                 * current buffer, need to get another one.
                 */
                if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) {
+                       int     lock_mode;
 
+                       lock_mode = xfs_ilock_data_map_shared(dp);
                        error = xfs_dir2_leaf_readbuf(args, bufsize, map_info,
                                                      &curoff, &bp);
+                       xfs_iunlock(dp, lock_mode);
                        if (error || !map_info->map_valid)
                                break;
 
@@ -653,7 +659,6 @@ xfs_readdir(
        struct xfs_da_args      args = { NULL };
        int                     rval;
        int                     v;
-       uint                    lock_mode;
 
        trace_xfs_readdir(dp);
 
@@ -666,7 +671,7 @@ xfs_readdir(
        args.dp = dp;
        args.geo = dp->i_mount->m_dir_geo;
 
-       lock_mode = xfs_ilock_data_map_shared(dp);
+       xfs_ilock(dp, XFS_IOLOCK_SHARED);
        if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
                rval = xfs_dir2_sf_getdents(&args, ctx);
        else if ((rval = xfs_dir2_isblock(&args, &v)))
@@ -675,7 +680,7 @@ xfs_readdir(
                rval = xfs_dir2_block_getdents(&args, ctx);
        else
                rval = xfs_dir2_leaf_getdents(&args, ctx, bufsize);
-       xfs_iunlock(dp, lock_mode);
+       xfs_iunlock(dp, XFS_IOLOCK_SHARED);
 
        return rval;
 }
index 4143dc7..30cb3af 100644 (file)
@@ -251,7 +251,7 @@ xfs_qm_init_dquot_blk(
                d->dd_diskdq.d_id = cpu_to_be32(curid);
                d->dd_diskdq.d_flags = type;
                if (xfs_sb_version_hascrc(&mp->m_sb)) {
-                       uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid);
+                       uuid_copy(&d->dd_uuid, &mp->m_sb.sb_meta_uuid);
                        xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
                                         XFS_DQUOT_CRC_OFF);
                }
@@ -954,12 +954,8 @@ xfs_qm_dqflush(
                struct xfs_log_item     *lip = &dqp->q_logitem.qli_item;
                dqp->dq_flags &= ~XFS_DQ_DIRTY;
 
-               spin_lock(&mp->m_ail->xa_lock);
-               if (lip->li_flags & XFS_LI_IN_AIL)
-                       xfs_trans_ail_delete(mp->m_ail, lip,
-                                            SHUTDOWN_CORRUPT_INCORE);
-               else
-                       spin_unlock(&mp->m_ail->xa_lock);
+               xfs_trans_ail_remove(lip, SHUTDOWN_CORRUPT_INCORE);
+
                error = -EIO;
                goto out_unlock;
        }
index adc8f8f..4aa0153 100644 (file)
@@ -46,28 +46,6 @@ xfs_efi_item_free(
                kmem_zone_free(xfs_efi_zone, efip);
 }
 
-/*
- * Freeing the efi requires that we remove it from the AIL if it has already
- * been placed there. However, the EFI may not yet have been placed in the AIL
- * when called by xfs_efi_release() from EFD processing due to the ordering of
- * committed vs unpin operations in bulk insert operations. Hence the reference
- * count to ensure only the last caller frees the EFI.
- */
-STATIC void
-__xfs_efi_release(
-       struct xfs_efi_log_item *efip)
-{
-       struct xfs_ail          *ailp = efip->efi_item.li_ailp;
-
-       if (atomic_dec_and_test(&efip->efi_refcount)) {
-               spin_lock(&ailp->xa_lock);
-               /* xfs_trans_ail_delete() drops the AIL lock. */
-               xfs_trans_ail_delete(ailp, &efip->efi_item,
-                                    SHUTDOWN_LOG_IO_ERROR);
-               xfs_efi_item_free(efip);
-       }
-}
-
 /*
  * This returns the number of iovecs needed to log the given efi item.
  * We only need 1 iovec for an efi item.  It just logs the efi_log_format
@@ -128,12 +106,12 @@ xfs_efi_item_pin(
 }
 
 /*
- * While EFIs cannot really be pinned, the unpin operation is the last place at
- * which the EFI is manipulated during a transaction.  If we are being asked to
- * remove the EFI it's because the transaction has been cancelled and by
- * definition that means the EFI cannot be in the AIL so remove it from the
- * transaction and free it.  Otherwise coordinate with xfs_efi_release()
- * to determine who gets to free the EFI.
+ * The unpin operation is the last place an EFI is manipulated in the log. It is
+ * either inserted in the AIL or aborted in the event of a log I/O error. In
+ * either case, the EFI transaction has been successfully committed to make it
+ * this far. Therefore, we expect whoever committed the EFI to either construct
+ * and commit the EFD or drop the EFD's reference in the event of error. Simply
+ * drop the log's EFI reference now that the log is done with it.
  */
 STATIC void
 xfs_efi_item_unpin(
@@ -141,15 +119,7 @@ xfs_efi_item_unpin(
        int                     remove)
 {
        struct xfs_efi_log_item *efip = EFI_ITEM(lip);
-
-       if (remove) {
-               ASSERT(!(lip->li_flags & XFS_LI_IN_AIL));
-               if (lip->li_desc)
-                       xfs_trans_del_item(lip);
-               xfs_efi_item_free(efip);
-               return;
-       }
-       __xfs_efi_release(efip);
+       xfs_efi_release(efip);
 }
 
 /*
@@ -167,6 +137,11 @@ xfs_efi_item_push(
        return XFS_ITEM_PINNED;
 }
 
+/*
+ * The EFI has been either committed or aborted if the transaction has been
+ * cancelled. If the transaction was cancelled, an EFD isn't going to be
+ * constructed and thus we free the EFI here directly.
+ */
 STATIC void
 xfs_efi_item_unlock(
        struct xfs_log_item     *lip)
@@ -301,23 +276,19 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
 }
 
 /*
- * This is called by the efd item code below to release references to the given
- * efi item.  Each efd calls this with the number of extents that it has
- * logged, and when the sum of these reaches the total number of extents logged
- * by this efi item we can free the efi item.
+ * Freeing the efi requires that we remove it from the AIL if it has already
+ * been placed there. However, the EFI may not yet have been placed in the AIL
+ * when called by xfs_efi_release() from EFD processing due to the ordering of
+ * committed vs unpin operations in bulk insert operations. Hence the reference
+ * count to ensure only the last caller frees the EFI.
  */
 void
-xfs_efi_release(xfs_efi_log_item_t     *efip,
-               uint                    nextents)
+xfs_efi_release(
+       struct xfs_efi_log_item *efip)
 {
-       ASSERT(atomic_read(&efip->efi_next_extent) >= nextents);
-       if (atomic_sub_and_test(nextents, &efip->efi_next_extent)) {
-               /* recovery needs us to drop the EFI reference, too */
-               if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags))
-                       __xfs_efi_release(efip);
-
-               __xfs_efi_release(efip);
-               /* efip may now have been freed, do not reference it again. */
+       if (atomic_dec_and_test(&efip->efi_refcount)) {
+               xfs_trans_ail_remove(&efip->efi_item, SHUTDOWN_LOG_IO_ERROR);
+               xfs_efi_item_free(efip);
        }
 }
 
@@ -415,20 +386,27 @@ xfs_efd_item_push(
        return XFS_ITEM_PINNED;
 }
 
+/*
+ * The EFD is either committed or aborted if the transaction is cancelled. If
+ * the transaction is cancelled, drop our reference to the EFI and free the EFD.
+ */
 STATIC void
 xfs_efd_item_unlock(
        struct xfs_log_item     *lip)
 {
-       if (lip->li_flags & XFS_LI_ABORTED)
-               xfs_efd_item_free(EFD_ITEM(lip));
+       struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
+
+       if (lip->li_flags & XFS_LI_ABORTED) {
+               xfs_efi_release(efdp->efd_efip);
+               xfs_efd_item_free(efdp);
+       }
 }
 
 /*
- * When the efd item is committed to disk, all we need to do
- * is delete our reference to our partner efi item and then
- * free ourselves.  Since we're freeing ourselves we must
- * return -1 to keep the transaction code from further referencing
- * this item.
+ * When the efd item is committed to disk, all we need to do is delete our
+ * reference to our partner efi item and then free ourselves. Since we're
+ * freeing ourselves we must return -1 to keep the transaction code from further
+ * referencing this item.
  */
 STATIC xfs_lsn_t
 xfs_efd_item_committed(
@@ -438,13 +416,14 @@ xfs_efd_item_committed(
        struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
 
        /*
-        * If we got a log I/O error, it's always the case that the LR with the
-        * EFI got unpinned and freed before the EFD got aborted.
+        * Drop the EFI reference regardless of whether the EFD has been
+        * aborted. Once the EFD transaction is constructed, it is the sole
+        * responsibility of the EFD to release the EFI (even if the EFI is
+        * aborted due to log I/O error).
         */
-       if (!(lip->li_flags & XFS_LI_ABORTED))
-               xfs_efi_release(efdp->efd_efip, efdp->efd_format.efd_nextents);
-
+       xfs_efi_release(efdp->efd_efip);
        xfs_efd_item_free(efdp);
+
        return (xfs_lsn_t)-1;
 }
 
index 0ffbce3..8fa8651 100644 (file)
@@ -39,9 +39,28 @@ struct kmem_zone;
  * "extent free done" log item described below.
  *
  * The EFI is reference counted so that it is not freed prior to both the EFI
- * and EFD being committed and unpinned. This ensures that when the last
- * reference goes away the EFI will always be in the AIL as it has been
- * unpinned, regardless of whether the EFD is processed before or after the EFI.
+ * and EFD being committed and unpinned. This ensures the EFI is inserted into
+ * the AIL even in the event of out of order EFI/EFD processing. In other words,
+ * an EFI is born with two references:
+ *
+ *     1.) an EFI held reference to track EFI AIL insertion
+ *     2.) an EFD held reference to track EFD commit
+ *
+ * On allocation, both references are the responsibility of the caller. Once the
+ * EFI is added to and dirtied in a transaction, ownership of reference one
+ * transfers to the transaction. The reference is dropped once the EFI is
+ * inserted to the AIL or in the event of failure along the way (e.g., commit
+ * failure, log I/O error, etc.). Note that the caller remains responsible for
+ * the EFD reference under all circumstances to this point. The caller has no
+ * means to detect failure once the transaction is committed, however.
+ * Therefore, an EFD is required after this point, even in the event of
+ * unrelated failure.
+ *
+ * Once an EFD is allocated and dirtied in a transaction, reference two
+ * transfers to the transaction. The EFD reference is dropped once it reaches
+ * the unpin handler. Similar to the EFI, the reference also drops in the event
+ * of commit failure or log I/O errors. Note that the EFD is not inserted in the
+ * AIL, so at this point both the EFI and EFD are freed.
  */
 typedef struct xfs_efi_log_item {
        xfs_log_item_t          efi_item;
@@ -77,5 +96,6 @@ xfs_efd_log_item_t    *xfs_efd_init(struct xfs_mount *, xfs_efi_log_item_t *,
 int                    xfs_efi_copy_format(xfs_log_iovec_t *buf,
                                            xfs_efi_log_format_t *dst_efi_fmt);
 void                   xfs_efi_item_free(xfs_efi_log_item_t *);
+void                   xfs_efi_release(struct xfs_efi_log_item *);
 
 #endif /* __XFS_EXTFREE_ITEM_H__ */
index db4acc1..de2c237 100644 (file)
@@ -317,24 +317,33 @@ xfs_file_read_iter(
                return -EIO;
 
        /*
-        * Locking is a bit tricky here. If we take an exclusive lock
-        * for direct IO, we effectively serialise all new concurrent
-        * read IO to this file and block it behind IO that is currently in
-        * progress because IO in progress holds the IO lock shared. We only
-        * need to hold the lock exclusive to blow away the page cache, so
-        * only take lock exclusively if the page cache needs invalidation.
-        * This allows the normal direct IO case of no page cache pages to
-        * proceeed concurrently without serialisation.
+        * Locking is a bit tricky here. If we take an exclusive lock for direct
+        * IO, we effectively serialise all new concurrent read IO to this file
+        * and block it behind IO that is currently in progress because IO in
+        * progress holds the IO lock shared. We only need to hold the lock
+        * exclusive to blow away the page cache, so only take lock exclusively
+        * if the page cache needs invalidation. This allows the normal direct
+        * IO case of no page cache pages to proceeed concurrently without
+        * serialisation.
         */
        xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
        if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) {
                xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
                xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
 
+               /*
+                * The generic dio code only flushes the range of the particular
+                * I/O. Because we take an exclusive lock here, this whole
+                * sequence is considerably more expensive for us. This has a
+                * noticeable performance impact for any file with cached pages,
+                * even when outside of the range of the particular I/O.
+                *
+                * Hence, amortize the cost of the lock against a full file
+                * flush and reduce the chances of repeated iolock cycles going
+                * forward.
+                */
                if (inode->i_mapping->nrpages) {
-                       ret = filemap_write_and_wait_range(
-                                                       VFS_I(ip)->i_mapping,
-                                                       pos, pos + size - 1);
+                       ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
                        if (ret) {
                                xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
                                return ret;
@@ -345,9 +354,7 @@ xfs_file_read_iter(
                         * we fail to invalidate a page, but this should never
                         * happen on XFS. Warn if it does fail.
                         */
-                       ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
-                                       pos >> PAGE_CACHE_SHIFT,
-                                       (pos + size - 1) >> PAGE_CACHE_SHIFT);
+                       ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
                        WARN_ON_ONCE(ret);
                        ret = 0;
                }
@@ -733,19 +740,19 @@ xfs_file_dio_aio_write(
        pos = iocb->ki_pos;
        end = pos + count - 1;
 
+       /*
+        * See xfs_file_read_iter() for why we do a full-file flush here.
+        */
        if (mapping->nrpages) {
-               ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
-                                                  pos, end);
+               ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
                if (ret)
                        goto out;
                /*
-                * Invalidate whole pages. This can return an error if
-                * we fail to invalidate a page, but this should never
-                * happen on XFS. Warn if it does fail.
+                * Invalidate whole pages. This can return an error if we fail
+                * to invalidate a page, but this should never happen on XFS.
+                * Warn if it does fail.
                 */
-               ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
-                                       pos >> PAGE_CACHE_SHIFT,
-                                       end >> PAGE_CACHE_SHIFT);
+               ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
                WARN_ON_ONCE(ret);
                ret = 0;
        }
index 9b3438a..ee3aaa0 100644 (file)
@@ -250,7 +250,7 @@ xfs_growfs_data_private(
                agf->agf_freeblks = cpu_to_be32(tmpsize);
                agf->agf_longest = cpu_to_be32(tmpsize);
                if (xfs_sb_version_hascrc(&mp->m_sb))
-                       uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_uuid);
+                       uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid);
 
                error = xfs_bwrite(bp);
                xfs_buf_relse(bp);
@@ -273,7 +273,7 @@ xfs_growfs_data_private(
                if (xfs_sb_version_hascrc(&mp->m_sb)) {
                        agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC);
                        agfl->agfl_seqno = cpu_to_be32(agno);
-                       uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_uuid);
+                       uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid);
                }
 
                agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp);
@@ -309,7 +309,7 @@ xfs_growfs_data_private(
                agi->agi_newino = cpu_to_be32(NULLAGINO);
                agi->agi_dirino = cpu_to_be32(NULLAGINO);
                if (xfs_sb_version_hascrc(&mp->m_sb))
-                       uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_uuid);
+                       uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid);
                if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
                        agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp));
                        agi->agi_free_level = cpu_to_be32(1);
index 76a9f27..0a326bd 100644 (file)
@@ -412,6 +412,8 @@ xfs_iget(
        if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
                return -EINVAL;
 
+       XFS_STATS_INC(xs_ig_attempts);
+
        /* get the perag structure and ensure that it's inode capable */
        pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
        agino = XFS_INO_TO_AGINO(mp, ino);
index 3da9f4d..dc40a6d 100644 (file)
@@ -164,7 +164,7 @@ xfs_ilock(
               (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
        ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
               (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
-       ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
+       ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
 
        if (lock_flags & XFS_IOLOCK_EXCL)
                mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
@@ -212,7 +212,7 @@ xfs_ilock_nowait(
               (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
        ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
               (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
-       ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
+       ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
 
        if (lock_flags & XFS_IOLOCK_EXCL) {
                if (!mrtryupdate(&ip->i_iolock))
@@ -281,7 +281,7 @@ xfs_iunlock(
               (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
        ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
               (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
-       ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
+       ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
        ASSERT(lock_flags != 0);
 
        if (lock_flags & XFS_IOLOCK_EXCL)
@@ -362,32 +362,58 @@ int xfs_lots_retries;
 int xfs_lock_delays;
 #endif
 
+/*
+ * xfs_lockdep_subclass_ok() is only used in an ASSERT, so is only called when
+ * DEBUG or XFS_WARN is set. And MAX_LOCKDEP_SUBCLASSES is then only defined
+ * when CONFIG_LOCKDEP is set. Hence the complex define below to avoid build
+ * errors and warnings.
+ */
+#if (defined(DEBUG) || defined(XFS_WARN)) && defined(CONFIG_LOCKDEP)
+static bool
+xfs_lockdep_subclass_ok(
+       int subclass)
+{
+       return subclass < MAX_LOCKDEP_SUBCLASSES;
+}
+#else
+#define xfs_lockdep_subclass_ok(subclass)      (true)
+#endif
+
 /*
  * Bump the subclass so xfs_lock_inodes() acquires each lock with a different
- * value. This shouldn't be called for page fault locking, but we also need to
- * ensure we don't overrun the number of lockdep subclasses for the iolock or
- * mmaplock as that is limited to 12 by the mmap lock lockdep annotations.
+ * value. This can be called for any type of inode lock combination, including
+ * parent locking. Care must be taken to ensure we don't overrun the subclass
+ * storage fields in the class mask we build.
  */
 static inline int
 xfs_lock_inumorder(int lock_mode, int subclass)
 {
+       int     class = 0;
+
+       ASSERT(!(lock_mode & (XFS_ILOCK_PARENT | XFS_ILOCK_RTBITMAP |
+                             XFS_ILOCK_RTSUM)));
+       ASSERT(xfs_lockdep_subclass_ok(subclass));
+
        if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
-               ASSERT(subclass + XFS_LOCK_INUMORDER <
-                       (1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT)));
-               lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
+               ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS);
+               ASSERT(xfs_lockdep_subclass_ok(subclass +
+                                               XFS_IOLOCK_PARENT_VAL));
+               class += subclass << XFS_IOLOCK_SHIFT;
+               if (lock_mode & XFS_IOLOCK_PARENT)
+                       class += XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT;
        }
 
        if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) {
-               ASSERT(subclass + XFS_LOCK_INUMORDER <
-                       (1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT)));
-               lock_mode |= (subclass + XFS_LOCK_INUMORDER) <<
-                                                       XFS_MMAPLOCK_SHIFT;
+               ASSERT(subclass <= XFS_MMAPLOCK_MAX_SUBCLASS);
+               class += subclass << XFS_MMAPLOCK_SHIFT;
        }
 
-       if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
-               lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
+       if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) {
+               ASSERT(subclass <= XFS_ILOCK_MAX_SUBCLASS);
+               class += subclass << XFS_ILOCK_SHIFT;
+       }
 
-       return lock_mode;
+       return (lock_mode & ~XFS_LOCK_SUBCLASS_MASK) | class;
 }
 
 /*
@@ -399,6 +425,11 @@ xfs_lock_inumorder(int lock_mode, int subclass)
  * transaction (such as truncate). This can result in deadlock since the long
  * running trans might need to wait for the inode we just locked in order to
  * push the tail and free space in the log.
+ *
+ * xfs_lock_inodes() can only be used to lock one type of lock at a time -
+ * the iolock, the mmaplock or the ilock, but not more than one at a time. If we
+ * lock more than one at a time, lockdep will report false positives saying we
+ * have violated locking orders.
  */
 void
 xfs_lock_inodes(
@@ -409,8 +440,29 @@ xfs_lock_inodes(
        int             attempts = 0, i, j, try_lock;
        xfs_log_item_t  *lp;
 
-       /* currently supports between 2 and 5 inodes */
+       /*
+        * Currently supports between 2 and 5 inodes with exclusive locking.  We
+        * support an arbitrary depth of locking here, but absolute limits on
+        * inodes depend on the the type of locking and the limits placed by
+        * lockdep annotations in xfs_lock_inumorder.  These are all checked by
+        * the asserts.
+        */
        ASSERT(ips && inodes >= 2 && inodes <= 5);
+       ASSERT(lock_mode & (XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL |
+                           XFS_ILOCK_EXCL));
+       ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED | XFS_MMAPLOCK_SHARED |
+                             XFS_ILOCK_SHARED)));
+       ASSERT(!(lock_mode & XFS_IOLOCK_EXCL) ||
+               inodes <= XFS_IOLOCK_MAX_SUBCLASS + 1);
+       ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) ||
+               inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1);
+       ASSERT(!(lock_mode & XFS_ILOCK_EXCL) ||
+               inodes <= XFS_ILOCK_MAX_SUBCLASS + 1);
+
+       if (lock_mode & XFS_IOLOCK_EXCL) {
+               ASSERT(!(lock_mode & (XFS_MMAPLOCK_EXCL | XFS_ILOCK_EXCL)));
+       } else if (lock_mode & XFS_MMAPLOCK_EXCL)
+               ASSERT(!(lock_mode & XFS_ILOCK_EXCL));
 
        try_lock = 0;
        i = 0;
@@ -629,30 +681,29 @@ xfs_lookup(
 {
        xfs_ino_t               inum;
        int                     error;
-       uint                    lock_mode;
 
        trace_xfs_lookup(dp, name);
 
        if (XFS_FORCED_SHUTDOWN(dp->i_mount))
                return -EIO;
 
-       lock_mode = xfs_ilock_data_map_shared(dp);
+       xfs_ilock(dp, XFS_IOLOCK_SHARED);
        error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
-       xfs_iunlock(dp, lock_mode);
-
        if (error)
-               goto out;
+               goto out_unlock;
 
        error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp);
        if (error)
                goto out_free_name;
 
+       xfs_iunlock(dp, XFS_IOLOCK_SHARED);
        return 0;
 
 out_free_name:
        if (ci_name)
                kmem_free(ci_name->name);
-out:
+out_unlock:
+       xfs_iunlock(dp, XFS_IOLOCK_SHARED);
        *ipp = NULL;
        return error;
 }
@@ -787,7 +838,7 @@ xfs_ialloc(
 
        if (ip->i_d.di_version == 3) {
                ASSERT(ip->i_d.di_ino == ino);
-               ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid));
+               ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_meta_uuid));
                ip->i_d.di_crc = 0;
                ip->i_d.di_changecount = 1;
                ip->i_d.di_lsn = 0;
@@ -1149,7 +1200,8 @@ xfs_create(
                goto out_trans_cancel;
 
 
-       xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
+       xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL |
+                     XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT);
        unlock_dp_on_error = true;
 
        xfs_bmap_init(&free_list, &first_block);
@@ -1175,11 +1227,8 @@ xfs_create(
         */
        error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev,
                               prid, resblks > 0, &ip, &committed);
-       if (error) {
-               if (error == -ENOSPC)
-                       goto out_trans_cancel;
+       if (error)
                goto out_trans_cancel;
-       }
 
        /*
         * Now we join the directory inode to the transaction.  We do not do it
@@ -1188,7 +1237,7 @@ xfs_create(
         * the transaction cancel unlocking dp so don't do it explicitly in the
         * error path.
         */
-       xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
+       xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
        unlock_dp_on_error = false;
 
        error = xfs_dir_createname(tp, dp, name, ip->i_ino,
@@ -1261,7 +1310,7 @@ xfs_create(
        xfs_qm_dqrele(pdqp);
 
        if (unlock_dp_on_error)
-               xfs_iunlock(dp, XFS_ILOCK_EXCL);
+               xfs_iunlock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
        return error;
 }
 
@@ -1318,11 +1367,8 @@ xfs_create_tmpfile(
 
        error = xfs_dir_ialloc(&tp, dp, mode, 1, 0,
                                prid, resblks > 0, &ip, NULL);
-       if (error) {
-               if (error == -ENOSPC)
-                       goto out_trans_cancel;
+       if (error)
                goto out_trans_cancel;
-       }
 
        if (mp->m_flags & XFS_MOUNT_WSYNC)
                xfs_trans_set_sync(tp);
@@ -1409,10 +1455,11 @@ xfs_link(
        if (error)
                goto error_return;
 
+       xfs_ilock(tdp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
        xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
 
        xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
-       xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
+       xfs_trans_ijoin(tp, tdp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
 
        /*
         * If we are using project inheritance, we only allow hard link
@@ -1791,14 +1838,15 @@ xfs_inactive_ifree(
        xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
 
        /*
-        * Just ignore errors at this point.  There is nothing we can
-        * do except to try to keep going. Make sure it's not a silent
-        * error.
+        * Just ignore errors at this point.  There is nothing we can do except
+        * to try to keep going. Make sure it's not a silent error.
         */
        error = xfs_bmap_finish(&tp,  &free_list, &committed);
-       if (error)
+       if (error) {
                xfs_notice(mp, "%s: xfs_bmap_finish returned error %d",
                        __func__, error);
+               xfs_bmap_cancel(&free_list);
+       }
        error = xfs_trans_commit(tp);
        if (error)
                xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
@@ -2515,9 +2563,10 @@ xfs_remove(
                goto out_trans_cancel;
        }
 
+       xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
        xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
 
-       xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
+       xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 
        /*
@@ -2898,6 +2947,12 @@ xfs_rename(
         * whether the target directory is the same as the source
         * directory, we can lock from 2 to 4 inodes.
         */
+       if (!new_parent)
+               xfs_ilock(src_dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
+       else
+               xfs_lock_two_inodes(src_dp, target_dp,
+                                   XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
+
        xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
 
        /*
@@ -2905,9 +2960,9 @@ xfs_rename(
         * we can rely on either trans_commit or trans_cancel to unlock
         * them.
         */
-       xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL);
+       xfs_trans_ijoin(tp, src_dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
        if (new_parent)
-               xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL);
+               xfs_trans_ijoin(tp, target_dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
        xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
        if (target_ip)
                xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
index 8f22d20..ca9e119 100644 (file)
@@ -284,9 +284,9 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
  * Flags for lockdep annotations.
  *
  * XFS_LOCK_PARENT - for directory operations that require locking a
- * parent directory inode and a child entry inode.  The parent gets locked
- * with this flag so it gets a lockdep subclass of 1 and the child entry
- * lock will have a lockdep subclass of 0.
+ * parent directory inode and a child entry inode. IOLOCK requires nesting,
+ * MMAPLOCK does not support this class, ILOCK requires a single subclass
+ * to differentiate parent from child.
  *
  * XFS_LOCK_RTBITMAP/XFS_LOCK_RTSUM - the realtime device bitmap and summary
  * inodes do not participate in the normal lock order, and thus have their
@@ -295,30 +295,63 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
  * XFS_LOCK_INUMORDER - for locking several inodes at the some time
  * with xfs_lock_inodes().  This flag is used as the starting subclass
  * and each subsequent lock acquired will increment the subclass by one.
- * So the first lock acquired will have a lockdep subclass of 4, the
- * second lock will have a lockdep subclass of 5, and so on. It is
- * the responsibility of the class builder to shift this to the correct
- * portion of the lock_mode lockdep mask.
+ * However, MAX_LOCKDEP_SUBCLASSES == 8, which means we are greatly
+ * limited to the subclasses we can represent via nesting. We need at least
+ * 5 inodes nest depth for the ILOCK through rename, and we also have to support
+ * XFS_ILOCK_PARENT, which gives 6 subclasses. Then we have XFS_ILOCK_RTBITMAP
+ * and XFS_ILOCK_RTSUM, which are another 2 unique subclasses, so that's all
+ * 8 subclasses supported by lockdep.
+ *
+ * This also means we have to number the sub-classes in the lowest bits of
+ * the mask we keep, and we have to ensure we never exceed 3 bits of lockdep
+ * mask and we can't use bit-masking to build the subclasses. What a mess.
+ *
+ * Bit layout:
+ *
+ * Bit         Lock Region
+ * 16-19       XFS_IOLOCK_SHIFT dependencies
+ * 20-23       XFS_MMAPLOCK_SHIFT dependencies
+ * 24-31       XFS_ILOCK_SHIFT dependencies
+ *
+ * IOLOCK values
+ *
+ * 0-3         subclass value
+ * 4-7         PARENT subclass values
+ *
+ * MMAPLOCK values
+ *
+ * 0-3         subclass value
+ * 4-7         unused
+ *
+ * ILOCK values
+ * 0-4         subclass values
+ * 5           PARENT subclass (not nestable)
+ * 6           RTBITMAP subclass (not nestable)
+ * 7           RTSUM subclass (not nestable)
+ * 
  */
-#define XFS_LOCK_PARENT                1
-#define XFS_LOCK_RTBITMAP      2
-#define XFS_LOCK_RTSUM         3
-#define XFS_LOCK_INUMORDER     4
-
-#define XFS_IOLOCK_SHIFT       16
-#define        XFS_IOLOCK_PARENT       (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
-
-#define XFS_MMAPLOCK_SHIFT     20
-
-#define XFS_ILOCK_SHIFT                24
-#define        XFS_ILOCK_PARENT        (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
-#define        XFS_ILOCK_RTBITMAP      (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
-#define        XFS_ILOCK_RTSUM         (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
-
-#define XFS_IOLOCK_DEP_MASK    0x000f0000
-#define XFS_MMAPLOCK_DEP_MASK  0x00f00000
-#define XFS_ILOCK_DEP_MASK     0xff000000
-#define XFS_LOCK_DEP_MASK      (XFS_IOLOCK_DEP_MASK | \
+#define XFS_IOLOCK_SHIFT               16
+#define XFS_IOLOCK_PARENT_VAL          4
+#define XFS_IOLOCK_MAX_SUBCLASS                (XFS_IOLOCK_PARENT_VAL - 1)
+#define XFS_IOLOCK_DEP_MASK            0x000f0000
+#define        XFS_IOLOCK_PARENT               (XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT)
+
+#define XFS_MMAPLOCK_SHIFT             20
+#define XFS_MMAPLOCK_NUMORDER          0
+#define XFS_MMAPLOCK_MAX_SUBCLASS      3
+#define XFS_MMAPLOCK_DEP_MASK          0x00f00000
+
+#define XFS_ILOCK_SHIFT                        24
+#define XFS_ILOCK_PARENT_VAL           5
+#define XFS_ILOCK_MAX_SUBCLASS         (XFS_ILOCK_PARENT_VAL - 1)
+#define XFS_ILOCK_RTBITMAP_VAL         6
+#define XFS_ILOCK_RTSUM_VAL            7
+#define XFS_ILOCK_DEP_MASK             0xff000000
+#define        XFS_ILOCK_PARENT                (XFS_ILOCK_PARENT_VAL << XFS_ILOCK_SHIFT)
+#define        XFS_ILOCK_RTBITMAP              (XFS_ILOCK_RTBITMAP_VAL << XFS_ILOCK_SHIFT)
+#define        XFS_ILOCK_RTSUM                 (XFS_ILOCK_RTSUM_VAL << XFS_ILOCK_SHIFT)
+
+#define XFS_LOCK_SUBCLASS_MASK (XFS_IOLOCK_DEP_MASK | \
                                 XFS_MMAPLOCK_DEP_MASK | \
                                 XFS_ILOCK_DEP_MASK)
 
index bf13a5a..62bd80f 100644 (file)
@@ -703,17 +703,10 @@ xfs_iflush_abort(
        xfs_inode_log_item_t    *iip = ip->i_itemp;
 
        if (iip) {
-               struct xfs_ail  *ailp = iip->ili_item.li_ailp;
                if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
-                       spin_lock(&ailp->xa_lock);
-                       if (iip->ili_item.li_flags & XFS_LI_IN_AIL) {
-                               /* xfs_trans_ail_delete() drops the AIL lock. */
-                               xfs_trans_ail_delete(ailp, &iip->ili_item,
-                                               stale ?
-                                                    SHUTDOWN_LOG_IO_ERROR :
+                       xfs_trans_ail_remove(&iip->ili_item,
+                                            stale ? SHUTDOWN_LOG_IO_ERROR :
                                                     SHUTDOWN_CORRUPT_INCORE);
-                       } else
-                               spin_unlock(&ailp->xa_lock);
                }
                iip->ili_logged = 0;
                /*
index 766b23f..8294132 100644 (file)
@@ -609,7 +609,7 @@ xfs_setattr_nonsize(
        tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
        if (error)
-               goto out_dqrele;
+               goto out_trans_cancel;
 
        xfs_ilock(ip, XFS_ILOCK_EXCL);
 
@@ -640,7 +640,7 @@ xfs_setattr_nonsize(
                                                NULL, capable(CAP_FOWNER) ?
                                                XFS_QMOPT_FORCE_RES : 0);
                        if (error)      /* out of quota */
-                               goto out_trans_cancel;
+                               goto out_unlock;
                }
        }
 
@@ -729,10 +729,10 @@ xfs_setattr_nonsize(
 
        return 0;
 
+out_unlock:
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
 out_trans_cancel:
        xfs_trans_cancel(tp);
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-out_dqrele:
        xfs_qm_dqrele(udqp);
        xfs_qm_dqrele(gdqp);
        return error;
index f41b0c3..930ebd8 100644 (file)
@@ -473,7 +473,8 @@ xfs_bulkstat(
                 * pending error, then we are done.
                 */
 del_cursor:
-               xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
+               xfs_btree_del_cursor(cur, error ?
+                                         XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
                xfs_buf_relse(agbp);
                if (error)
                        break;
index 08d4fe4..aaadee0 100644 (file)
@@ -668,9 +668,9 @@ xfs_log_mount(
                        ASSERT(0);
                        goto out_free_log;
                }
+               xfs_crit(mp, "Log size out of supported range.");
                xfs_crit(mp,
-"Log size out of supported range. Continuing onwards, but if log hangs are\n"
-"experienced then please report this message in the bug report.");
+"Continuing onwards, but if log hangs are experienced then please report this message in the bug report.");
        }
 
        /*
@@ -700,6 +700,7 @@ xfs_log_mount(
                if (error) {
                        xfs_warn(mp, "log mount/recovery failed: error %d",
                                error);
+                       xlog_recover_cancel(mp->m_log);
                        goto out_destroy_ail;
                }
        }
@@ -740,18 +741,35 @@ out:
  * it.
  */
 int
-xfs_log_mount_finish(xfs_mount_t *mp)
+xfs_log_mount_finish(
+       struct xfs_mount        *mp)
 {
        int     error = 0;
 
-       if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) {
-               error = xlog_recover_finish(mp->m_log);
-               if (!error)
-                       xfs_log_work_queue(mp);
-       } else {
+       if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
                ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
+               return 0;
        }
 
+       error = xlog_recover_finish(mp->m_log);
+       if (!error)
+               xfs_log_work_queue(mp);
+
+       return error;
+}
+
+/*
+ * The mount has failed. Cancel the recovery if it hasn't completed and destroy
+ * the log.
+ */
+int
+xfs_log_mount_cancel(
+       struct xfs_mount        *mp)
+{
+       int                     error;
+
+       error = xlog_recover_cancel(mp->m_log);
+       xfs_log_unmount(mp);
 
        return error;
 }
@@ -1142,11 +1160,13 @@ xlog_space_left(
                 * In this case we just want to return the size of the
                 * log as the amount of space left.
                 */
+               xfs_alert(log->l_mp, "xlog_space_left: head behind tail");
                xfs_alert(log->l_mp,
-                       "xlog_space_left: head behind tail\n"
-                       "  tail_cycle = %d, tail_bytes = %d\n"
-                       "  GH   cycle = %d, GH   bytes = %d",
-                       tail_cycle, tail_bytes, head_cycle, head_bytes);
+                         "  tail_cycle = %d, tail_bytes = %d",
+                         tail_cycle, tail_bytes);
+               xfs_alert(log->l_mp,
+                         "  GH   cycle = %d, GH   bytes = %d",
+                         head_cycle, head_bytes);
                ASSERT(0);
                free_bytes = log->l_logsize;
        }
@@ -1652,8 +1672,13 @@ xlog_cksum(
        if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
                union xlog_in_core2 *xhdr = (union xlog_in_core2 *)rhead;
                int             i;
+               int             xheads;
+
+               xheads = size / XLOG_HEADER_CYCLE_SIZE;
+               if (size % XLOG_HEADER_CYCLE_SIZE)
+                       xheads++;
 
-               for (i = 1; i < log->l_iclog_heads; i++) {
+               for (i = 1; i < xheads; i++) {
                        crc = crc32c(crc, &xhdr[i].hic_xheader,
                                     sizeof(struct xlog_rec_ext_header));
                }
@@ -2028,26 +2053,24 @@ xlog_print_tic_res(
            "SWAPEXT"
        };
 
-       xfs_warn(mp,
-               "xlog_write: reservation summary:\n"
-               "  trans type  = %s (%u)\n"
-               "  unit res    = %d bytes\n"
-               "  current res = %d bytes\n"
-               "  total reg   = %u bytes (o/flow = %u bytes)\n"
-               "  ophdrs      = %u (ophdr space = %u bytes)\n"
-               "  ophdr + reg = %u bytes\n"
-               "  num regions = %u",
-               ((ticket->t_trans_type <= 0 ||
-                 ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ?
+       xfs_warn(mp, "xlog_write: reservation summary:");
+       xfs_warn(mp, "  trans type  = %s (%u)",
+                ((ticket->t_trans_type <= 0 ||
+                  ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ?
                  "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]),
-               ticket->t_trans_type,
-               ticket->t_unit_res,
-               ticket->t_curr_res,
-               ticket->t_res_arr_sum, ticket->t_res_o_flow,
-               ticket->t_res_num_ophdrs, ophdr_spc,
-               ticket->t_res_arr_sum +
-               ticket->t_res_o_flow + ophdr_spc,
-               ticket->t_res_num);
+                ticket->t_trans_type);
+       xfs_warn(mp, "  unit res    = %d bytes",
+                ticket->t_unit_res);
+       xfs_warn(mp, "  current res = %d bytes",
+                ticket->t_curr_res);
+       xfs_warn(mp, "  total reg   = %u bytes (o/flow = %u bytes)",
+                ticket->t_res_arr_sum, ticket->t_res_o_flow);
+       xfs_warn(mp, "  ophdrs      = %u (ophdr space = %u bytes)",
+                ticket->t_res_num_ophdrs, ophdr_spc);
+       xfs_warn(mp, "  ophdr + reg = %u bytes",
+                ticket->t_res_arr_sum + ticket->t_res_o_flow + ophdr_spc);
+       xfs_warn(mp, "  num regions = %u",
+                ticket->t_res_num);
 
        for (i = 0; i < ticket->t_res_num; i++) {
                uint r_type = ticket->t_res_arr[i].r_type;
index fa27aae..09d91d3 100644 (file)
@@ -147,6 +147,7 @@ int   xfs_log_mount(struct xfs_mount        *mp,
                        xfs_daddr_t             start_block,
                        int                     num_bblocks);
 int      xfs_log_mount_finish(struct xfs_mount *mp);
+int    xfs_log_mount_cancel(struct xfs_mount *);
 xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
 xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp);
 void     xfs_log_space_wake(struct xfs_mount *mp);
index abc2ccb..4e76493 100644 (file)
@@ -307,7 +307,13 @@ xlog_cil_insert_items(
                if (!(lidp->lid_flags & XFS_LID_DIRTY))
                        continue;
 
-               list_move_tail(&lip->li_cil, &cil->xc_cil);
+               /*
+                * Only move the item if it isn't already at the tail. This is
+                * to prevent a transient list_empty() state when reinserting
+                * an item that is already the only item in the CIL.
+                */
+               if (!list_is_last(&lip->li_cil, &cil->xc_cil))
+                       list_move_tail(&lip->li_cil, &cil->xc_cil);
        }
 
        /* account for space used by new iovec headers  */
index 1c87c8a..950f3f9 100644 (file)
@@ -426,6 +426,8 @@ xlog_recover(
 extern int
 xlog_recover_finish(
        struct xlog             *log);
+extern int
+xlog_recover_cancel(struct xlog *);
 
 extern __le32   xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead,
                            char *dp, int size);
index 480ebba..512a094 100644 (file)
@@ -1895,15 +1895,25 @@ xlog_recover_get_buf_lsn(
                 */
                goto recover_immediately;
        case XFS_SB_MAGIC:
+               /*
+                * superblock uuids are magic. We may or may not have a
+                * sb_meta_uuid on disk, but it will be set in the in-core
+                * superblock. We set the uuid pointer for verification
+                * according to the superblock feature mask to ensure we check
+                * the relevant UUID in the superblock.
+                */
                lsn = be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
-               uuid = &((struct xfs_dsb *)blk)->sb_uuid;
+               if (xfs_sb_version_hasmetauuid(&mp->m_sb))
+                       uuid = &((struct xfs_dsb *)blk)->sb_meta_uuid;
+               else
+                       uuid = &((struct xfs_dsb *)blk)->sb_uuid;
                break;
        default:
                break;
        }
 
        if (lsn != (xfs_lsn_t)-1) {
-               if (!uuid_equal(&mp->m_sb.sb_uuid, uuid))
+               if (!uuid_equal(&mp->m_sb.sb_meta_uuid, uuid))
                        goto recover_immediately;
                return lsn;
        }
@@ -2933,16 +2943,16 @@ xlog_recover_efi_pass2(
        struct xlog_recover_item        *item,
        xfs_lsn_t                       lsn)
 {
-       int                     error;
-       xfs_mount_t             *mp = log->l_mp;
-       xfs_efi_log_item_t      *efip;
-       xfs_efi_log_format_t    *efi_formatp;
+       int                             error;
+       struct xfs_mount                *mp = log->l_mp;
+       struct xfs_efi_log_item         *efip;
+       struct xfs_efi_log_format       *efi_formatp;
 
        efi_formatp = item->ri_buf[0].i_addr;
 
        efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
-       if ((error = xfs_efi_copy_format(&(item->ri_buf[0]),
-                                        &(efip->efi_format)))) {
+       error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format);
+       if (error) {
                xfs_efi_item_free(efip);
                return error;
        }
@@ -2950,20 +2960,23 @@ xlog_recover_efi_pass2(
 
        spin_lock(&log->l_ailp->xa_lock);
        /*
-        * xfs_trans_ail_update() drops the AIL lock.
+        * The EFI has two references. One for the EFD and one for EFI to ensure
+        * it makes it into the AIL. Insert the EFI into the AIL directly and
+        * drop the EFI reference. Note that xfs_trans_ail_update() drops the
+        * AIL lock.
         */
        xfs_trans_ail_update(log->l_ailp, &efip->efi_item, lsn);
+       xfs_efi_release(efip);
        return 0;
 }
 
 
 /*
- * This routine is called when an efd format structure is found in
- * a committed transaction in the log.  It's purpose is to cancel
- * the corresponding efi if it was still in the log.  To do this
- * it searches the AIL for the efi with an id equal to that in the
- * efd format structure.  If we find it, we remove the efi from the
- * AIL and free it.
+ * This routine is called when an EFD format structure is found in a committed
+ * transaction in the log. Its purpose is to cancel the corresponding EFI if it
+ * was still in the log. To do this it searches the AIL for the EFI with an id
+ * equal to that in the EFD format structure. If we find it we drop the EFD
+ * reference, which removes the EFI from the AIL and frees it.
  */
 STATIC int
 xlog_recover_efd_pass2(
@@ -2985,8 +2998,8 @@ xlog_recover_efd_pass2(
        efi_id = efd_formatp->efd_efi_id;
 
        /*
-        * Search for the efi with the id in the efd format structure
-        * in the AIL.
+        * Search for the EFI with the id in the EFD format structure in the
+        * AIL.
         */
        spin_lock(&ailp->xa_lock);
        lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
@@ -2995,18 +3008,18 @@ xlog_recover_efd_pass2(
                        efip = (xfs_efi_log_item_t *)lip;
                        if (efip->efi_format.efi_id == efi_id) {
                                /*
-                                * xfs_trans_ail_delete() drops the
-                                * AIL lock.
+                                * Drop the EFD reference to the EFI. This
+                                * removes the EFI from the AIL and frees it.
                                 */
-                               xfs_trans_ail_delete(ailp, lip,
-                                                    SHUTDOWN_CORRUPT_INCORE);
-                               xfs_efi_item_free(efip);
+                               spin_unlock(&ailp->xa_lock);
+                               xfs_efi_release(efip);
                                spin_lock(&ailp->xa_lock);
                                break;
                        }
                }
                lip = xfs_trans_ail_cursor_next(ailp, &cur);
        }
+
        xfs_trans_ail_cursor_done(&cur);
        spin_unlock(&ailp->xa_lock);
 
@@ -3034,6 +3047,11 @@ xlog_recover_do_icreate_pass2(
        unsigned int            count;
        unsigned int            isize;
        xfs_agblock_t           length;
+       int                     blks_per_cluster;
+       int                     bb_per_cluster;
+       int                     cancel_count;
+       int                     nbufs;
+       int                     i;
 
        icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr;
        if (icl->icl_type != XFS_LI_ICREATE) {
@@ -3092,22 +3110,45 @@ xlog_recover_do_icreate_pass2(
        }
 
        /*
-        * Inode buffers can be freed. Do not replay the inode initialisation as
-        * we could be overwriting something written after this inode buffer was
-        * cancelled.
+        * The icreate transaction can cover multiple cluster buffers and these
+        * buffers could have been freed and reused. Check the individual
+        * buffers for cancellation so we don't overwrite anything written after
+        * a cancellation.
+        */
+       blks_per_cluster = xfs_icluster_size_fsb(mp);
+       bb_per_cluster = XFS_FSB_TO_BB(mp, blks_per_cluster);
+       nbufs = length / blks_per_cluster;
+       for (i = 0, cancel_count = 0; i < nbufs; i++) {
+               xfs_daddr_t     daddr;
+
+               daddr = XFS_AGB_TO_DADDR(mp, agno,
+                                        agbno + i * blks_per_cluster);
+               if (xlog_check_buffer_cancelled(log, daddr, bb_per_cluster, 0))
+                       cancel_count++;
+       }
+
+       /*
+        * We currently only use icreate for a single allocation at a time. This
+        * means we should expect either all or none of the buffers to be
+        * cancelled. Be conservative and skip replay if at least one buffer is
+        * cancelled, but warn the user that something is awry if the buffers
+        * are not consistent.
         *
-        * XXX: we need to iterate all buffers and only init those that are not
-        * cancelled. I think that a more fine grained factoring of
-        * xfs_ialloc_inode_init may be appropriate here to enable this to be
-        * done easily.
+        * XXX: This must be refined to only skip cancelled clusters once we use
+        * icreate for multiple chunk allocations.
         */
-       if (xlog_check_buffer_cancelled(log,
-                       XFS_AGB_TO_DADDR(mp, agno, agbno), length, 0))
+       ASSERT(!cancel_count || cancel_count == nbufs);
+       if (cancel_count) {
+               if (cancel_count != nbufs)
+                       xfs_warn(mp,
+       "WARNING: partial inode chunk cancellation, skipped icreate.");
+               trace_xfs_log_recover_icreate_cancel(log, icl);
                return 0;
+       }
 
-       xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno, length,
-                             be32_to_cpu(icl->icl_gen));
-       return 0;
+       trace_xfs_log_recover_icreate_recover(log, icl);
+       return xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno,
+                                    length, be32_to_cpu(icl->icl_gen));
 }
 
 STATIC void
@@ -3385,14 +3426,24 @@ xlog_recover_add_to_cont_trans(
        char                    *ptr, *old_ptr;
        int                     old_len;
 
+       /*
+        * If the transaction is empty, the header was split across this and the
+        * previous record. Copy the rest of the header.
+        */
        if (list_empty(&trans->r_itemq)) {
-               /* finish copying rest of trans header */
+               ASSERT(len < sizeof(struct xfs_trans_header));
+               if (len > sizeof(struct xfs_trans_header)) {
+                       xfs_warn(log->l_mp, "%s: bad header length", __func__);
+                       return -EIO;
+               }
+
                xlog_recover_add_item(&trans->r_itemq);
                ptr = (char *)&trans->r_theader +
-                               sizeof(xfs_trans_header_t) - len;
+                               sizeof(struct xfs_trans_header) - len;
                memcpy(ptr, dp, len);
                return 0;
        }
+
        /* take the tail entry */
        item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
 
@@ -3441,7 +3492,19 @@ xlog_recover_add_to_trans(
                        ASSERT(0);
                        return -EIO;
                }
-               if (len == sizeof(xfs_trans_header_t))
+
+               if (len > sizeof(struct xfs_trans_header)) {
+                       xfs_warn(log->l_mp, "%s: bad header length", __func__);
+                       ASSERT(0);
+                       return -EIO;
+               }
+
+               /*
+                * The transaction header can be arbitrarily split across op
+                * records. If we don't have the whole thing here, copy what we
+                * do have and handle the rest in the next record.
+                */
+               if (len == sizeof(struct xfs_trans_header))
                        xlog_recover_add_item(&trans->r_itemq);
                memcpy(&trans->r_theader, dp, len);
                return 0;
@@ -3744,7 +3807,7 @@ xlog_recover_process_efi(
                         * free the memory associated with it.
                         */
                        set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
-                       xfs_efi_release(efip, efip->efi_format.efi_nextents);
+                       xfs_efi_release(efip);
                        return -EIO;
                }
        }
@@ -3757,11 +3820,11 @@ xlog_recover_process_efi(
 
        for (i = 0; i < efip->efi_format.efi_nextents; i++) {
                extp = &(efip->efi_format.efi_extents[i]);
-               error = xfs_free_extent(tp, extp->ext_start, extp->ext_len);
+               error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
+                                             extp->ext_len);
                if (error)
                        goto abort_error;
-               xfs_trans_log_efd_extent(tp, efdp, extp->ext_start,
-                                        extp->ext_len);
+
        }
 
        set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
@@ -3793,10 +3856,10 @@ abort_error:
  */
 STATIC int
 xlog_recover_process_efis(
-       struct xlog     *log)
+       struct xlog             *log)
 {
-       xfs_log_item_t          *lip;
-       xfs_efi_log_item_t      *efip;
+       struct xfs_log_item     *lip;
+       struct xfs_efi_log_item *efip;
        int                     error = 0;
        struct xfs_ail_cursor   cur;
        struct xfs_ail          *ailp;
@@ -3820,7 +3883,7 @@ xlog_recover_process_efis(
                /*
                 * Skip EFIs that we've already processed.
                 */
-               efip = (xfs_efi_log_item_t *)lip;
+               efip = container_of(lip, struct xfs_efi_log_item, efi_item);
                if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) {
                        lip = xfs_trans_ail_cursor_next(ailp, &cur);
                        continue;
@@ -3839,6 +3902,50 @@ out:
        return error;
 }
 
+/*
+ * A cancel occurs when the mount has failed and we're bailing out. Release all
+ * pending EFIs so they don't pin the AIL.
+ */
+STATIC int
+xlog_recover_cancel_efis(
+       struct xlog             *log)
+{
+       struct xfs_log_item     *lip;
+       struct xfs_efi_log_item *efip;
+       int                     error = 0;
+       struct xfs_ail_cursor   cur;
+       struct xfs_ail          *ailp;
+
+       ailp = log->l_ailp;
+       spin_lock(&ailp->xa_lock);
+       lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
+       while (lip != NULL) {
+               /*
+                * We're done when we see something other than an EFI.
+                * There should be no EFIs left in the AIL now.
+                */
+               if (lip->li_type != XFS_LI_EFI) {
+#ifdef DEBUG
+                       for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur))
+                               ASSERT(lip->li_type != XFS_LI_EFI);
+#endif
+                       break;
+               }
+
+               efip = container_of(lip, struct xfs_efi_log_item, efi_item);
+
+               spin_unlock(&ailp->xa_lock);
+               xfs_efi_release(efip);
+               spin_lock(&ailp->xa_lock);
+
+               lip = xfs_trans_ail_cursor_next(ailp, &cur);
+       }
+
+       xfs_trans_ail_cursor_done(&cur);
+       spin_unlock(&ailp->xa_lock);
+       return error;
+}
+
 /*
  * This routine performs a transaction to null out a bad inode pointer
  * in an agi unlinked inode hash bucket.
@@ -4532,11 +4639,13 @@ xlog_recover(
                    xfs_sb_has_incompat_log_feature(&log->l_mp->m_sb,
                                        XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)) {
                        xfs_warn(log->l_mp,
-"Superblock has unknown incompatible log features (0x%x) enabled.\n"
-"The log can not be fully and/or safely recovered by this kernel.\n"
-"Please recover the log on a kernel that supports the unknown features.",
+"Superblock has unknown incompatible log features (0x%x) enabled.",
                                (log->l_mp->m_sb.sb_features_log_incompat &
                                        XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN));
+                       xfs_warn(log->l_mp,
+"The log can not be fully and/or safely recovered by this kernel.");
+                       xfs_warn(log->l_mp,
+"Please recover the log on a kernel that supports the unknown features.");
                        return -EINVAL;
                }
 
@@ -4612,6 +4721,17 @@ xlog_recover_finish(
        return 0;
 }
 
+int
+xlog_recover_cancel(
+       struct xlog     *log)
+{
+       int             error = 0;
+
+       if (log->l_flags & XLOG_RECOVERY_NEEDED)
+               error = xlog_recover_cancel_efis(log);
+
+       return error;
+}
 
 #if defined(DEBUG)
 /*
index 461e791..bf92e0c 100644 (file)
@@ -615,14 +615,14 @@ xfs_default_resblks(xfs_mount_t *mp)
  */
 int
 xfs_mountfs(
-       xfs_mount_t     *mp)
+       struct xfs_mount        *mp)
 {
-       xfs_sb_t        *sbp = &(mp->m_sb);
-       xfs_inode_t     *rip;
-       __uint64_t      resblks;
-       uint            quotamount = 0;
-       uint            quotaflags = 0;
-       int             error = 0;
+       struct xfs_sb           *sbp = &(mp->m_sb);
+       struct xfs_inode        *rip;
+       __uint64_t              resblks;
+       uint                    quotamount = 0;
+       uint                    quotaflags = 0;
+       int                     error = 0;
 
        xfs_sb_mount_common(mp, sbp);
 
@@ -799,7 +799,9 @@ xfs_mountfs(
        }
 
        /*
-        * log's mount-time initialization. Perform 1st part recovery if needed
+        * Log's mount-time initialization. The first part of recovery can place
+        * some items on the AIL, to be handled when recovery is finished or
+        * cancelled.
         */
        error = xfs_log_mount(mp, mp->m_logdev_targp,
                              XFS_FSB_TO_DADDR(mp, sbp->sb_logstart),
@@ -910,9 +912,9 @@ xfs_mountfs(
        }
 
        /*
-        * Finish recovering the file system.  This part needed to be
-        * delayed until after the root and real-time bitmap inodes
-        * were consistently read in.
+        * Finish recovering the file system.  This part needed to be delayed
+        * until after the root and real-time bitmap inodes were consistently
+        * read in.
         */
        error = xfs_log_mount_finish(mp);
        if (error) {
@@ -955,8 +957,10 @@ xfs_mountfs(
        xfs_rtunmount_inodes(mp);
  out_rele_rip:
        IRELE(rip);
+       cancel_delayed_work_sync(&mp->m_reclaim_work);
+       xfs_reclaim_inodes(mp, SYNC_WAIT);
  out_log_dealloc:
-       xfs_log_unmount(mp);
+       xfs_log_mount_cancel(mp);
  out_fail_wait:
        if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
                xfs_wait_buftarg(mp->m_logdev_targp);
index f4e8c06..ab1bac6 100644 (file)
@@ -757,31 +757,30 @@ xfs_rtallocate_extent_size(
 /*
  * Allocate space to the bitmap or summary file, and zero it, for growfs.
  */
-STATIC int                             /* error */
+STATIC int
 xfs_growfs_rt_alloc(
-       xfs_mount_t     *mp,            /* file system mount point */
-       xfs_extlen_t    oblocks,        /* old count of blocks */
-       xfs_extlen_t    nblocks,        /* new count of blocks */
-       xfs_inode_t     *ip)            /* inode (bitmap/summary) */
+       struct xfs_mount        *mp,            /* file system mount point */
+       xfs_extlen_t            oblocks,        /* old count of blocks */
+       xfs_extlen_t            nblocks,        /* new count of blocks */
+       struct xfs_inode        *ip)            /* inode (bitmap/summary) */
 {
-       xfs_fileoff_t   bno;            /* block number in file */
-       xfs_buf_t       *bp;            /* temporary buffer for zeroing */
-       int             committed;      /* transaction committed flag */
-       xfs_daddr_t     d;              /* disk block address */
-       int             error;          /* error return value */
-       xfs_fsblock_t   firstblock;     /* first block allocated in xaction */
-       xfs_bmap_free_t flist;          /* list of freed blocks */
-       xfs_fsblock_t   fsbno;          /* filesystem block for bno */
-       xfs_bmbt_irec_t map;            /* block map output */
-       int             nmap;           /* number of block maps */
-       int             resblks;        /* space reservation */
+       xfs_fileoff_t           bno;            /* block number in file */
+       struct xfs_buf          *bp;    /* temporary buffer for zeroing */
+       int                     committed;      /* transaction committed flag */
+       xfs_daddr_t             d;              /* disk block address */
+       int                     error;          /* error return value */
+       xfs_fsblock_t           firstblock;/* first block allocated in xaction */
+       struct xfs_bmap_free    flist;          /* list of freed blocks */
+       xfs_fsblock_t           fsbno;          /* filesystem block for bno */
+       struct xfs_bmbt_irec    map;            /* block map output */
+       int                     nmap;           /* number of block maps */
+       int                     resblks;        /* space reservation */
+       struct xfs_trans        *tp;
 
        /*
         * Allocate space to the file, as necessary.
         */
        while (oblocks < nblocks) {
-               xfs_trans_t     *tp;
-
                tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ALLOC);
                resblks = XFS_GROWFSRT_SPACE_RES(mp, nblocks - oblocks);
                /*
@@ -790,7 +789,7 @@ xfs_growfs_rt_alloc(
                error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtalloc,
                                          resblks, 0);
                if (error)
-                       goto error_cancel;
+                       goto out_trans_cancel;
                /*
                 * Lock the inode.
                 */
@@ -808,16 +807,16 @@ xfs_growfs_rt_alloc(
                if (!error && nmap < 1)
                        error = -ENOSPC;
                if (error)
-                       goto error_cancel;
+                       goto out_bmap_cancel;
                /*
                 * Free any blocks freed up in the transaction, then commit.
                 */
                error = xfs_bmap_finish(&tp, &flist, &committed);
                if (error)
-                       goto error_cancel;
+                       goto out_bmap_cancel;
                error = xfs_trans_commit(tp);
                if (error)
-                       goto error;
+                       return error;
                /*
                 * Now we need to clear the allocated blocks.
                 * Do this one block per transaction, to keep it simple.
@@ -832,7 +831,7 @@ xfs_growfs_rt_alloc(
                        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtzero,
                                                  0, 0);
                        if (error)
-                               goto error_cancel;
+                               goto out_trans_cancel;
                        /*
                         * Lock the bitmap inode.
                         */
@@ -846,9 +845,7 @@ xfs_growfs_rt_alloc(
                                mp->m_bsize, 0);
                        if (bp == NULL) {
                                error = -EIO;
-error_cancel:
-                               xfs_trans_cancel(tp);
-                               goto error;
+                               goto out_trans_cancel;
                        }
                        memset(bp->b_addr, 0, mp->m_sb.sb_blocksize);
                        xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
@@ -857,16 +854,20 @@ error_cancel:
                         */
                        error = xfs_trans_commit(tp);
                        if (error)
-                               goto error;
+                               return error;
                }
                /*
                 * Go on to the next extent, if any.
                 */
                oblocks = map.br_startoff + map.br_blockcount;
        }
+
        return 0;
 
-error:
+out_bmap_cancel:
+       xfs_bmap_cancel(&flist);
+out_trans_cancel:
+       xfs_trans_cancel(tp);
        return error;
 }
 
index bbd9b1f..904f637 100644 (file)
@@ -261,16 +261,8 @@ xfs_parseargs(
                        mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
                        if (!mp->m_rtname)
                                return -ENOMEM;
-               } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
-                       if (!value || !*value) {
-                               xfs_warn(mp, "%s option requires an argument",
-                                       this_char);
-                               return -EINVAL;
-                       }
-                       if (kstrtoint(value, 10, &iosize))
-                               return -EINVAL;
-                       iosizelog = ffs(iosize) - 1;
-               } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
+               } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE) ||
+                          !strcmp(this_char, MNTOPT_BIOSIZE)) {
                        if (!value || !*value) {
                                xfs_warn(mp, "%s option requires an argument",
                                        this_char);
@@ -1528,6 +1520,10 @@ xfs_fs_fill_super(
                }
        }
 
+       if (xfs_sb_version_hassparseinodes(&mp->m_sb))
+               xfs_alert(mp,
+       "EXPERIMENTAL sparse inode feature enabled. Use at your own risk!");
+
        error = xfs_mountfs(mp);
        if (error)
                goto out_filestream_unmount;
index 4be27b0..996481e 100644 (file)
@@ -240,7 +240,8 @@ xfs_symlink(
        if (error)
                goto out_trans_cancel;
 
-       xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
+       xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL |
+                     XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT);
        unlock_dp_on_error = true;
 
        /*
@@ -288,7 +289,7 @@ xfs_symlink(
         * the transaction cancel unlocking dp so don't do it explicitly in the
         * error path.
         */
-       xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
+       xfs_trans_ijoin(tp, dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
        unlock_dp_on_error = false;
 
        /*
@@ -421,7 +422,7 @@ out_release_inode:
        xfs_qm_dqrele(pdqp);
 
        if (unlock_dp_on_error)
-               xfs_iunlock(dp, XFS_ILOCK_EXCL);
+               xfs_iunlock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
        return error;
 }
 
@@ -501,7 +502,7 @@ xfs_inactive_symlink_rmt(
        /*
         * Unmap the dead block(s) to the free_list.
         */
-       error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps,
+       error = xfs_bunmapi(tp, ip, 0, size, 0, nmaps,
                            &first_block, &free_list, &done);
        if (error)
                goto error_bmap_cancel;
index 8d916d3..9aeeb21 100644 (file)
@@ -2089,6 +2089,40 @@ DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover);
 DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel);
 DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip);
 
+DECLARE_EVENT_CLASS(xfs_log_recover_icreate_item_class,
+       TP_PROTO(struct xlog *log, struct xfs_icreate_log *in_f),
+       TP_ARGS(log, in_f),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agblock_t, agbno)
+               __field(unsigned int, count)
+               __field(unsigned int, isize)
+               __field(xfs_agblock_t, length)
+               __field(unsigned int, gen)
+       ),
+       TP_fast_assign(
+               __entry->dev = log->l_mp->m_super->s_dev;
+               __entry->agno = be32_to_cpu(in_f->icl_ag);
+               __entry->agbno = be32_to_cpu(in_f->icl_agbno);
+               __entry->count = be32_to_cpu(in_f->icl_count);
+               __entry->isize = be32_to_cpu(in_f->icl_isize);
+               __entry->length = be32_to_cpu(in_f->icl_length);
+               __entry->gen = be32_to_cpu(in_f->icl_gen);
+       ),
+       TP_printk("dev %d:%d agno %u agbno %u count %u isize %u length %u "
+                 "gen %u", MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno, __entry->agbno, __entry->count, __entry->isize,
+                 __entry->length, __entry->gen)
+)
+#define DEFINE_LOG_RECOVER_ICREATE_ITEM(name) \
+DEFINE_EVENT(xfs_log_recover_icreate_item_class, name, \
+       TP_PROTO(struct xlog *log, struct xfs_icreate_log *in_f), \
+       TP_ARGS(log, in_f))
+
+DEFINE_LOG_RECOVER_ICREATE_ITEM(xfs_log_recover_icreate_cancel);
+DEFINE_LOG_RECOVER_ICREATE_ITEM(xfs_log_recover_icreate_recover);
+
 DECLARE_EVENT_CLASS(xfs_discard_class,
        TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
                 xfs_agblock_t agbno, xfs_extlen_t len),
index 0582a27..a0ab1da 100644 (file)
@@ -1019,9 +1019,10 @@ xfs_trans_cancel(
  * chunk we've been working on and get a new transaction to continue.
  */
 int
-xfs_trans_roll(
+__xfs_trans_roll(
        struct xfs_trans        **tpp,
-       struct xfs_inode        *dp)
+       struct xfs_inode        *dp,
+       int                     *committed)
 {
        struct xfs_trans        *trans;
        struct xfs_trans_res    tres;
@@ -1052,6 +1053,7 @@ xfs_trans_roll(
        if (error)
                return error;
 
+       *committed = 1;
        trans = *tpp;
 
        /*
@@ -1074,3 +1076,12 @@ xfs_trans_roll(
                xfs_trans_ijoin(trans, dp, 0);
        return 0;
 }
+
+int
+xfs_trans_roll(
+       struct xfs_trans        **tpp,
+       struct xfs_inode        *dp)
+{
+       int                     committed = 0;
+       return __xfs_trans_roll(tpp, dp, &committed);
+}
index 3b21b4e..4643070 100644 (file)
@@ -213,7 +213,6 @@ void                xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *, uint);
 void           xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint);
 void           xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint);
 struct xfs_efi_log_item        *xfs_trans_get_efi(xfs_trans_t *, uint);
-void           xfs_efi_release(struct xfs_efi_log_item *, uint);
 void           xfs_trans_log_efi_extent(xfs_trans_t *,
                                         struct xfs_efi_log_item *,
                                         xfs_fsblock_t,
@@ -221,11 +220,11 @@ void              xfs_trans_log_efi_extent(xfs_trans_t *,
 struct xfs_efd_log_item        *xfs_trans_get_efd(xfs_trans_t *,
                                  struct xfs_efi_log_item *,
                                  uint);
-void           xfs_trans_log_efd_extent(xfs_trans_t *,
-                                        struct xfs_efd_log_item *,
-                                        xfs_fsblock_t,
-                                        xfs_extlen_t);
+int            xfs_trans_free_extent(struct xfs_trans *,
+                                     struct xfs_efd_log_item *, xfs_fsblock_t,
+                                     xfs_extlen_t);
 int            xfs_trans_commit(struct xfs_trans *);
+int            __xfs_trans_roll(struct xfs_trans **, struct xfs_inode *, int *);
 int            xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
 void           xfs_trans_cancel(xfs_trans_t *);
 int            xfs_trans_ail_init(struct xfs_mount *);
index 284397d..a96ae54 100644 (file)
@@ -25,6 +25,7 @@
 #include "xfs_trans.h"
 #include "xfs_trans_priv.h"
 #include "xfs_extfree_item.h"
+#include "xfs_alloc.h"
 
 /*
  * This routine is called to allocate an "extent free intention"
@@ -108,19 +109,30 @@ xfs_trans_get_efd(xfs_trans_t             *tp,
 }
 
 /*
- * This routine is called to indicate that the described
- * extent is to be logged as having been freed.  It should
- * be called once for each extent freed.
+ * Free an extent and log it to the EFD. Note that the transaction is marked
+ * dirty regardless of whether the extent free succeeds or fails to support the
+ * EFI/EFD lifecycle rules.
  */
-void
-xfs_trans_log_efd_extent(xfs_trans_t           *tp,
-                        xfs_efd_log_item_t     *efdp,
-                        xfs_fsblock_t          start_block,
-                        xfs_extlen_t           ext_len)
+int
+xfs_trans_free_extent(
+       struct xfs_trans        *tp,
+       struct xfs_efd_log_item *efdp,
+       xfs_fsblock_t           start_block,
+       xfs_extlen_t            ext_len)
 {
        uint                    next_extent;
-       xfs_extent_t            *extp;
+       struct xfs_extent       *extp;
+       int                     error;
 
+       error = xfs_free_extent(tp, start_block, ext_len);
+
+       /*
+        * Mark the transaction dirty, even on error. This ensures the
+        * transaction is aborted, which:
+        *
+        * 1.) releases the EFI and frees the EFD
+        * 2.) shuts down the filesystem
+        */
        tp->t_flags |= XFS_TRANS_DIRTY;
        efdp->efd_item.li_desc->lid_flags |= XFS_LID_DIRTY;
 
@@ -130,4 +142,6 @@ xfs_trans_log_efd_extent(xfs_trans_t                *tp,
        extp->ext_start = start_block;
        extp->ext_len = ext_len;
        efdp->efd_next_extent++;
+
+       return error;
 }
index 1b73629..49931b7 100644 (file)
@@ -119,6 +119,21 @@ xfs_trans_ail_delete(
        xfs_trans_ail_delete_bulk(ailp, &lip, 1, shutdown_type);
 }
 
+static inline void
+xfs_trans_ail_remove(
+       struct xfs_log_item     *lip,
+       int                     shutdown_type)
+{
+       struct xfs_ail          *ailp = lip->li_ailp;
+
+       spin_lock(&ailp->xa_lock);
+       /* xfs_trans_ail_delete() drops the AIL lock */
+       if (lip->li_flags & XFS_LI_IN_AIL)
+               xfs_trans_ail_delete(ailp, lip, shutdown_type);
+       else
+               spin_unlock(&ailp->xa_lock);
+}
+
 void                   xfs_ail_push(struct xfs_ail *, xfs_lsn_t);
 void                   xfs_ail_push_all(struct xfs_ail *);
 void                   xfs_ail_push_all_sync(struct xfs_ail *);