Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 23 Oct 2010 00:32:27 +0000 (17:32 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 23 Oct 2010 00:32:27 +0000 (17:32 -0700)
* 'for-linus' of git://oss.sgi.com/xfs/xfs: (36 commits)
  xfs: semaphore cleanup
  xfs: Extend project quotas to support 32bit project ids
  xfs: remove xfs_buf wrappers
  xfs: remove xfs_cred.h
  xfs: remove xfs_globals.h
  xfs: remove xfs_version.h
  xfs: remove xfs_refcache.h
  xfs: fix the xfs_trans_committed
  xfs: remove unused t_callback field in struct xfs_trans
  xfs: fix bogus m_maxagi check in xfs_iget
  xfs: do not use xfs_mod_incore_sb_batch for per-cpu counters
  xfs: do not use xfs_mod_incore_sb for per-cpu counters
  xfs: remove XFS_MOUNT_NO_PERCPU_SB
  xfs: pack xfs_buf structure more tightly
  xfs: convert buffer cache hash to rbtree
  xfs: serialise inode reclaim within an AG
  xfs: batch inode reclaim lookup
  xfs: implement batched inode lookups for AG walking
  xfs: split out inode walk inode grabbing
  xfs: split inode AG walking into separate code for reclaim
  ...

1  2 
fs/xfs/linux-2.6/xfs_buf.c
fs/xfs/linux-2.6/xfs_buf.h
fs/xfs/linux-2.6/xfs_super.c
fs/xfs/linux-2.6/xfs_trace.h
fs/xfs/xfs_log.c

@@@ -188,8 -188,8 +188,8 @@@ _xfs_buf_initialize
        atomic_set(&bp->b_hold, 1);
        init_completion(&bp->b_iowait);
        INIT_LIST_HEAD(&bp->b_list);
-       INIT_LIST_HEAD(&bp->b_hash_list);
-       init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */
+       RB_CLEAR_NODE(&bp->b_rbnode);
+       sema_init(&bp->b_sema, 0); /* held, no waiters */
        XB_SET_OWNER(bp);
        bp->b_target = target;
        bp->b_file_offset = range_base;
@@@ -262,8 -262,6 +262,6 @@@ xfs_buf_free
  {
        trace_xfs_buf_free(bp, _RET_IP_);
  
-       ASSERT(list_empty(&bp->b_hash_list));
        if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
                uint            i;
  
@@@ -422,8 -420,10 +420,10 @@@ _xfs_buf_find
  {
        xfs_off_t               range_base;
        size_t                  range_length;
-       xfs_bufhash_t           *hash;
-       xfs_buf_t               *bp, *n;
+       struct xfs_perag        *pag;
+       struct rb_node          **rbp;
+       struct rb_node          *parent;
+       xfs_buf_t               *bp;
  
        range_base = (ioff << BBSHIFT);
        range_length = (isize << BBSHIFT);
        ASSERT(!(range_length < (1 << btp->bt_sshift)));
        ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
  
-       hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];
-       spin_lock(&hash->bh_lock);
-       list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
-               ASSERT(btp == bp->b_target);
-               if (bp->b_file_offset == range_base &&
-                   bp->b_buffer_length == range_length) {
+       /* get tree root */
+       pag = xfs_perag_get(btp->bt_mount,
+                               xfs_daddr_to_agno(btp->bt_mount, ioff));
+       /* walk tree */
+       spin_lock(&pag->pag_buf_lock);
+       rbp = &pag->pag_buf_tree.rb_node;
+       parent = NULL;
+       bp = NULL;
+       while (*rbp) {
+               parent = *rbp;
+               bp = rb_entry(parent, struct xfs_buf, b_rbnode);
+               if (range_base < bp->b_file_offset)
+                       rbp = &(*rbp)->rb_left;
+               else if (range_base > bp->b_file_offset)
+                       rbp = &(*rbp)->rb_right;
+               else {
+                       /*
+                        * found a block offset match. If the range doesn't
+                        * match, the only way this is allowed is if the buffer
+                        * in the cache is stale and the transaction that made
+                        * it stale has not yet committed. i.e. we are
+                        * reallocating a busy extent. Skip this buffer and
+                        * continue searching to the right for an exact match.
+                        */
+                       if (bp->b_buffer_length != range_length) {
+                               ASSERT(bp->b_flags & XBF_STALE);
+                               rbp = &(*rbp)->rb_right;
+                               continue;
+                       }
                        atomic_inc(&bp->b_hold);
                        goto found;
                }
        if (new_bp) {
                _xfs_buf_initialize(new_bp, btp, range_base,
                                range_length, flags);
-               new_bp->b_hash = hash;
-               list_add(&new_bp->b_hash_list, &hash->bh_list);
+               rb_link_node(&new_bp->b_rbnode, parent, rbp);
+               rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
+               /* the buffer keeps the perag reference until it is freed */
+               new_bp->b_pag = pag;
+               spin_unlock(&pag->pag_buf_lock);
        } else {
                XFS_STATS_INC(xb_miss_locked);
+               spin_unlock(&pag->pag_buf_lock);
+               xfs_perag_put(pag);
        }
-       spin_unlock(&hash->bh_lock);
        return new_bp;
  
  found:
-       spin_unlock(&hash->bh_lock);
+       spin_unlock(&pag->pag_buf_lock);
+       xfs_perag_put(pag);
  
        /* Attempt to get the semaphore without sleeping,
         * if this does not work then we need to drop the
@@@ -625,8 -652,7 +652,7 @@@ voi
  xfs_buf_readahead(
        xfs_buftarg_t           *target,
        xfs_off_t               ioff,
-       size_t                  isize,
-       xfs_buf_flags_t         flags)
+       size_t                  isize)
  {
        struct backing_dev_info *bdi;
  
        if (bdi_read_congested(bdi))
                return;
  
-       flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
-       xfs_buf_read(target, ioff, isize, flags);
+       xfs_buf_read(target, ioff, isize,
+                    XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK);
+ }
+ /*
+  * Read an uncached buffer from disk. Allocates and returns a locked
+  * buffer containing the disk contents or nothing.
+  */
+ struct xfs_buf *
+ xfs_buf_read_uncached(
+       struct xfs_mount        *mp,
+       struct xfs_buftarg      *target,
+       xfs_daddr_t             daddr,
+       size_t                  length,
+       int                     flags)
+ {
+       xfs_buf_t               *bp;
+       int                     error;
+       bp = xfs_buf_get_uncached(target, length, flags);
+       if (!bp)
+               return NULL;
+       /* set up the buffer for a read IO */
+       xfs_buf_lock(bp);
+       XFS_BUF_SET_ADDR(bp, daddr);
+       XFS_BUF_READ(bp);
+       XFS_BUF_BUSY(bp);
+       xfsbdstrat(mp, bp);
+       error = xfs_buf_iowait(bp);
+       if (error || bp->b_error) {
+               xfs_buf_relse(bp);
+               return NULL;
+       }
+       return bp;
  }
  
  xfs_buf_t *
@@@ -707,9 -767,10 +767,10 @@@ xfs_buf_associate_memory
  }
  
  xfs_buf_t *
- xfs_buf_get_noaddr(
+ xfs_buf_get_uncached(
+       struct xfs_buftarg      *target,
        size_t                  len,
-       xfs_buftarg_t           *target)
+       int                     flags)
  {
        unsigned long           page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
        int                     error, i;
                goto fail_free_buf;
  
        for (i = 0; i < page_count; i++) {
-               bp->b_pages[i] = alloc_page(GFP_KERNEL);
+               bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
                if (!bp->b_pages[i])
                        goto fail_free_mem;
        }
  
        xfs_buf_unlock(bp);
  
-       trace_xfs_buf_get_noaddr(bp, _RET_IP_);
+       trace_xfs_buf_get_uncached(bp, _RET_IP_);
        return bp;
  
   fail_free_mem:
@@@ -774,29 -835,30 +835,30 @@@ voi
  xfs_buf_rele(
        xfs_buf_t               *bp)
  {
-       xfs_bufhash_t           *hash = bp->b_hash;
+       struct xfs_perag        *pag = bp->b_pag;
  
        trace_xfs_buf_rele(bp, _RET_IP_);
  
-       if (unlikely(!hash)) {
+       if (!pag) {
                ASSERT(!bp->b_relse);
+               ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
                if (atomic_dec_and_test(&bp->b_hold))
                        xfs_buf_free(bp);
                return;
        }
  
+       ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
        ASSERT(atomic_read(&bp->b_hold) > 0);
-       if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) {
+       if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
                if (bp->b_relse) {
                        atomic_inc(&bp->b_hold);
-                       spin_unlock(&hash->bh_lock);
-                       (*(bp->b_relse)) (bp);
-               } else if (bp->b_flags & XBF_FS_MANAGED) {
-                       spin_unlock(&hash->bh_lock);
+                       spin_unlock(&pag->pag_buf_lock);
+                       bp->b_relse(bp);
                } else {
                        ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
-                       list_del_init(&bp->b_hash_list);
-                       spin_unlock(&hash->bh_lock);
+                       rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
+                       spin_unlock(&pag->pag_buf_lock);
+                       xfs_perag_put(pag);
                        xfs_buf_free(bp);
                }
        }
@@@ -859,7 -921,7 +921,7 @@@ xfs_buf_lock
        trace_xfs_buf_lock(bp, _RET_IP_);
  
        if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
-               xfs_log_force(bp->b_mount, 0);
+               xfs_log_force(bp->b_target->bt_mount, 0);
        if (atomic_read(&bp->b_io_remaining))
                blk_run_address_space(bp->b_target->bt_mapping);
        down(&bp->b_sema);
@@@ -924,7 -986,19 +986,7 @@@ xfs_buf_iodone_work
        xfs_buf_t               *bp =
                container_of(work, xfs_buf_t, b_iodone_work);
  
 -      /*
 -       * We can get an EOPNOTSUPP to ordered writes.  Here we clear the
 -       * ordered flag and reissue them.  Because we can't tell the higher
 -       * layers directly that they should not issue ordered I/O anymore, they
 -       * need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion.
 -       */
 -      if ((bp->b_error == EOPNOTSUPP) &&
 -          (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) {
 -              trace_xfs_buf_ordered_retry(bp, _RET_IP_);
 -              bp->b_flags &= ~XBF_ORDERED;
 -              bp->b_flags |= _XFS_BARRIER_FAILED;
 -              xfs_buf_iorequest(bp);
 -      } else if (bp->b_iodone)
 +      if (bp->b_iodone)
                (*(bp->b_iodone))(bp);
        else if (bp->b_flags & XBF_ASYNC)
                xfs_buf_relse(bp);
@@@ -970,7 -1044,6 +1032,6 @@@ xfs_bwrite
  {
        int                     error;
  
-       bp->b_mount = mp;
        bp->b_flags |= XBF_WRITE;
        bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
  
@@@ -991,8 -1064,6 +1052,6 @@@ xfs_bdwrite
  {
        trace_xfs_buf_bdwrite(bp, _RET_IP_);
  
-       bp->b_mount = mp;
        bp->b_flags &= ~XBF_READ;
        bp->b_flags |= (XBF_DELWRI | XBF_ASYNC);
  
  
  /*
   * Called when we want to stop a buffer from getting written or read.
-  * We attach the EIO error, muck with its flags, and call biodone
+  * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
   * so that the proper iodone callbacks get called.
   */
  STATIC int
@@@ -1018,21 -1089,21 +1077,21 @@@ xfs_bioerror
        XFS_BUF_ERROR(bp, EIO);
  
        /*
-        * We're calling biodone, so delete XBF_DONE flag.
+        * We're calling xfs_buf_ioend, so delete XBF_DONE flag.
         */
        XFS_BUF_UNREAD(bp);
        XFS_BUF_UNDELAYWRITE(bp);
        XFS_BUF_UNDONE(bp);
        XFS_BUF_STALE(bp);
  
-       xfs_biodone(bp);
+       xfs_buf_ioend(bp, 0);
  
        return EIO;
  }
  
  /*
   * Same as xfs_bioerror, except that we are releasing the buffer
-  * here ourselves, and avoiding the biodone call.
+  * here ourselves, and avoiding the xfs_buf_ioend call.
   * This is meant for userdata errors; metadata bufs come with
   * iodone functions attached, so that we can track down errors.
   */
@@@ -1081,7 -1152,7 +1140,7 @@@ in
  xfs_bdstrat_cb(
        struct xfs_buf  *bp)
  {
-       if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
+       if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
                trace_xfs_bdstrat_shut(bp, _RET_IP_);
                /*
                 * Metadata write that didn't get logged but
@@@ -1183,7 -1254,7 +1242,7 @@@ _xfs_buf_ioapply
  
        if (bp->b_flags & XBF_ORDERED) {
                ASSERT(!(bp->b_flags & XBF_READ));
 -              rw = WRITE_BARRIER;
 +              rw = WRITE_FLUSH_FUA;
        } else if (bp->b_flags & XBF_LOG_BUFFER) {
                ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
                bp->b_flags &= ~_XBF_RUN_QUEUES;
@@@ -1387,62 -1458,24 +1446,24 @@@ xfs_buf_iomove
   */
  void
  xfs_wait_buftarg(
-       xfs_buftarg_t   *btp)
- {
-       xfs_buf_t       *bp, *n;
-       xfs_bufhash_t   *hash;
-       uint            i;
-       for (i = 0; i < (1 << btp->bt_hashshift); i++) {
-               hash = &btp->bt_hash[i];
- again:
-               spin_lock(&hash->bh_lock);
-               list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
-                       ASSERT(btp == bp->b_target);
-                       if (!(bp->b_flags & XBF_FS_MANAGED)) {
-                               spin_unlock(&hash->bh_lock);
-                               /*
-                                * Catch superblock reference count leaks
-                                * immediately
-                                */
-                               BUG_ON(bp->b_bn == 0);
-                               delay(100);
-                               goto again;
-                       }
-               }
-               spin_unlock(&hash->bh_lock);
-       }
- }
- /*
-  *    Allocate buffer hash table for a given target.
-  *    For devices containing metadata (i.e. not the log/realtime devices)
-  *    we need to allocate a much larger hash table.
-  */
- STATIC void
- xfs_alloc_bufhash(
-       xfs_buftarg_t           *btp,
-       int                     external)
+       struct xfs_buftarg      *btp)
  {
-       unsigned int            i;
+       struct xfs_perag        *pag;
+       uint                    i;
  
-       btp->bt_hashshift = external ? 3 : 12;  /* 8 or 4096 buckets */
-       btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) *
-                                        sizeof(xfs_bufhash_t));
-       for (i = 0; i < (1 << btp->bt_hashshift); i++) {
-               spin_lock_init(&btp->bt_hash[i].bh_lock);
-               INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
+       for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) {
+               pag = xfs_perag_get(btp->bt_mount, i);
+               spin_lock(&pag->pag_buf_lock);
+               while (rb_first(&pag->pag_buf_tree)) {
+                       spin_unlock(&pag->pag_buf_lock);
+                       delay(100);
+                       spin_lock(&pag->pag_buf_lock);
+               }
+               spin_unlock(&pag->pag_buf_lock);
+               xfs_perag_put(pag);
        }
  }
  
- STATIC void
- xfs_free_bufhash(
-       xfs_buftarg_t           *btp)
- {
-       kmem_free_large(btp->bt_hash);
-       btp->bt_hash = NULL;
- }
  /*
   *    buftarg list for delwrite queue processing
   */
@@@ -1475,7 -1508,6 +1496,6 @@@ xfs_free_buftarg
        xfs_flush_buftarg(btp, 1);
        if (mp->m_flags & XFS_MOUNT_BARRIER)
                xfs_blkdev_issue_flush(btp);
-       xfs_free_bufhash(btp);
        iput(btp->bt_mapping->host);
  
        /* Unregister the buftarg first so that we don't get a
@@@ -1597,6 -1629,7 +1617,7 @@@ out_error
  
  xfs_buftarg_t *
  xfs_alloc_buftarg(
+       struct xfs_mount        *mp,
        struct block_device     *bdev,
        int                     external,
        const char              *fsname)
  
        btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
  
+       btp->bt_mount = mp;
        btp->bt_dev =  bdev->bd_dev;
        btp->bt_bdev = bdev;
        if (xfs_setsize_buftarg_early(btp, bdev))
                goto error;
        if (xfs_alloc_delwrite_queue(btp, fsname))
                goto error;
-       xfs_alloc_bufhash(btp, external);
        return btp;
  
  error:
@@@ -1904,7 -1937,7 +1925,7 @@@ xfs_flush_buftarg
                        bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
  
                        list_del_init(&bp->b_list);
-                       xfs_iowait(bp);
+                       xfs_buf_iowait(bp);
                        xfs_buf_relse(bp);
                }
        }
@@@ -1921,7 -1954,7 +1942,7 @@@ xfs_buf_init(void
                goto out;
  
        xfslogd_workqueue = alloc_workqueue("xfslogd",
 -                                      WQ_RESCUER | WQ_HIGHPRI, 1);
 +                                      WQ_MEM_RECLAIM | WQ_HIGHPRI, 1);
        if (!xfslogd_workqueue)
                goto out_free_buf_zone;
  
@@@ -51,7 -51,6 +51,6 @@@ typedef enum 
  #define XBF_DONE      (1 << 5) /* all pages in the buffer uptodate */
  #define XBF_DELWRI    (1 << 6) /* buffer has dirty pages */
  #define XBF_STALE     (1 << 7) /* buffer has been staled, do not find it */
- #define XBF_FS_MANAGED        (1 << 8) /* filesystem controls freeing memory */
  #define XBF_ORDERED   (1 << 11)/* use ordered writes */
  #define XBF_READ_AHEAD        (1 << 12)/* asynchronous read-ahead */
  #define XBF_LOG_BUFFER        (1 << 13)/* this is a buffer used for the log */
   */
  #define _XBF_PAGE_LOCKED      (1 << 22)
  
 -/*
 - * If we try a barrier write, but it fails we have to communicate
 - * this to the upper layers.  Unfortunately b_error gets overwritten
 - * when the buffer is re-issued so we have to add another flag to
 - * keep this information.
 - */
 -#define _XFS_BARRIER_FAILED   (1 << 23)
 -
  typedef unsigned int xfs_buf_flags_t;
  
  #define XFS_BUF_FLAGS \
        { XBF_DONE,             "DONE" }, \
        { XBF_DELWRI,           "DELWRI" }, \
        { XBF_STALE,            "STALE" }, \
-       { XBF_FS_MANAGED,       "FS_MANAGED" }, \
        { XBF_ORDERED,          "ORDERED" }, \
        { XBF_READ_AHEAD,       "READ_AHEAD" }, \
        { XBF_LOCK,             "LOCK" },       /* should never be set */\
        { _XBF_PAGES,           "PAGES" }, \
        { _XBF_RUN_QUEUES,      "RUN_QUEUES" }, \
        { _XBF_DELWRI_Q,        "DELWRI_Q" }, \
 -      { _XBF_PAGE_LOCKED,     "PAGE_LOCKED" }, \
 -      { _XFS_BARRIER_FAILED,  "BARRIER_FAILED" }
 +      { _XBF_PAGE_LOCKED,     "PAGE_LOCKED" }
  
  
  typedef enum {
@@@ -123,14 -130,11 +121,11 @@@ typedef struct xfs_buftarg 
        dev_t                   bt_dev;
        struct block_device     *bt_bdev;
        struct address_space    *bt_mapping;
+       struct xfs_mount        *bt_mount;
        unsigned int            bt_bsize;
        unsigned int            bt_sshift;
        size_t                  bt_smask;
  
-       /* per device buffer hash table */
-       uint                    bt_hashshift;
-       xfs_bufhash_t           *bt_hash;
        /* per device delwri queue */
        struct task_struct      *bt_task;
        struct list_head        bt_list;
@@@ -158,34 -162,41 +153,41 @@@ typedef int (*xfs_buf_bdstrat_t)(struc
  #define XB_PAGES      2
  
  typedef struct xfs_buf {
+       /*
+        * first cacheline holds all the fields needed for an uncontended cache
+        * hit to be fully processed. The semaphore straddles the cacheline
+        * boundary, but the counter and lock sits on the first cacheline,
+        * which is the only bit that is touched if we hit the semaphore
+        * fast-path on locking.
+        */
+       struct rb_node          b_rbnode;       /* rbtree node */
+       xfs_off_t               b_file_offset;  /* offset in file */
+       size_t                  b_buffer_length;/* size of buffer in bytes */
+       atomic_t                b_hold;         /* reference count */
+       xfs_buf_flags_t         b_flags;        /* status flags */
        struct semaphore        b_sema;         /* semaphore for lockables */
-       unsigned long           b_queuetime;    /* time buffer was queued */
-       atomic_t                b_pin_count;    /* pin count */
        wait_queue_head_t       b_waiters;      /* unpin waiters */
        struct list_head        b_list;
-       xfs_buf_flags_t         b_flags;        /* status flags */
-       struct list_head        b_hash_list;    /* hash table list */
-       xfs_bufhash_t           *b_hash;        /* hash table list start */
+       struct xfs_perag        *b_pag;         /* contains rbtree root */
        xfs_buftarg_t           *b_target;      /* buffer target (device) */
-       atomic_t                b_hold;         /* reference count */
        xfs_daddr_t             b_bn;           /* block number for I/O */
-       xfs_off_t               b_file_offset;  /* offset in file */
-       size_t                  b_buffer_length;/* size of buffer in bytes */
        size_t                  b_count_desired;/* desired transfer size */
        void                    *b_addr;        /* virtual address of buffer */
        struct work_struct      b_iodone_work;
-       atomic_t                b_io_remaining; /* #outstanding I/O requests */
        xfs_buf_iodone_t        b_iodone;       /* I/O completion function */
        xfs_buf_relse_t         b_relse;        /* releasing function */
        struct completion       b_iowait;       /* queue for I/O waiters */
        void                    *b_fspriv;
        void                    *b_fspriv2;
-       struct xfs_mount        *b_mount;
-       unsigned short          b_error;        /* error code on I/O */
-       unsigned int            b_page_count;   /* size of page array */
-       unsigned int            b_offset;       /* page offset in first page */
        struct page             **b_pages;      /* array of page pointers */
        struct page             *b_page_array[XB_PAGES]; /* inline pages */
+       unsigned long           b_queuetime;    /* time buffer was queued */
+       atomic_t                b_pin_count;    /* pin count */
+       atomic_t                b_io_remaining; /* #outstanding I/O requests */
+       unsigned int            b_page_count;   /* size of page array */
+       unsigned int            b_offset;       /* page offset in first page */
+       unsigned short          b_error;        /* error code on I/O */
  #ifdef XFS_BUF_LOCK_TRACKING
        int                     b_last_holder;
  #endif
@@@ -204,11 -215,13 +206,13 @@@ extern xfs_buf_t *xfs_buf_read(xfs_buft
                                xfs_buf_flags_t);
  
  extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
- extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *);
+ extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
  extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
  extern void xfs_buf_hold(xfs_buf_t *);
- extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t,
-                               xfs_buf_flags_t);
+ extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t);
+ struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp,
+                               struct xfs_buftarg *target,
+                               xfs_daddr_t daddr, size_t length, int flags);
  
  /* Releasing Buffers */
  extern void xfs_buf_free(xfs_buf_t *);
@@@ -233,6 -246,8 +237,8 @@@ extern int xfs_buf_iorequest(xfs_buf_t 
  extern int xfs_buf_iowait(xfs_buf_t *);
  extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
                                xfs_buf_rw_t);
+ #define xfs_buf_zero(bp, off, len) \
+           xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
  
  static inline int xfs_buf_geterror(xfs_buf_t *bp)
  {
@@@ -267,8 -282,6 +273,6 @@@ extern void xfs_buf_terminate(void)
                                        XFS_BUF_DONE(bp);       \
                                } while (0)
  
- #define XFS_BUF_UNMANAGE(bp)  ((bp)->b_flags &= ~XBF_FS_MANAGED)
  #define XFS_BUF_DELAYWRITE(bp)                ((bp)->b_flags |= XBF_DELWRI)
  #define XFS_BUF_UNDELAYWRITE(bp)      xfs_buf_delwri_dequeue(bp)
  #define XFS_BUF_ISDELAYWRITE(bp)      ((bp)->b_flags & XBF_DELWRI)
@@@ -347,25 -360,11 +351,11 @@@ static inline void xfs_buf_relse(xfs_bu
        xfs_buf_rele(bp);
  }
  
- #define xfs_biodone(bp)               xfs_buf_ioend(bp, 0)
- #define xfs_biomove(bp, off, len, data, rw) \
-           xfs_buf_iomove((bp), (off), (len), (data), \
-               ((rw) == XBF_WRITE) ? XBRW_WRITE : XBRW_READ)
- #define xfs_biozero(bp, off, len) \
-           xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
- #define xfs_iowait(bp)        xfs_buf_iowait(bp)
- #define xfs_baread(target, rablkno, ralen)  \
-       xfs_buf_readahead((target), (rablkno), (ralen), XBF_DONT_BLOCK)
  /*
   *    Handling of buftargs.
   */
- extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int, const char *);
+ extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
+                       struct block_device *, int, const char *);
  extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
  extern void xfs_wait_buftarg(xfs_buftarg_t *);
  extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
@@@ -44,7 -44,6 +44,6 @@@
  #include "xfs_buf_item.h"
  #include "xfs_utils.h"
  #include "xfs_vnodeops.h"
- #include "xfs_version.h"
  #include "xfs_log_priv.h"
  #include "xfs_trans_priv.h"
  #include "xfs_filestream.h"
@@@ -645,7 -644,7 +644,7 @@@ xfs_barrier_test
        XFS_BUF_ORDERED(sbp);
  
        xfsbdstrat(mp, sbp);
-       error = xfs_iowait(sbp);
+       error = xfs_buf_iowait(sbp);
  
        /*
         * Clear all the flags we set and possible error state in the
@@@ -693,7 -692,8 +692,7 @@@ voi
  xfs_blkdev_issue_flush(
        xfs_buftarg_t           *buftarg)
  {
 -      blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL,
 -                      BLKDEV_IFL_WAIT);
 +      blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL);
  }
  
  STATIC void
@@@ -757,18 -757,20 +756,20 @@@ xfs_open_devices
         * Setup xfs_mount buffer target pointers
         */
        error = ENOMEM;
-       mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0, mp->m_fsname);
+       mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname);
        if (!mp->m_ddev_targp)
                goto out_close_rtdev;
  
        if (rtdev) {
-               mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1, mp->m_fsname);
+               mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1,
+                                                       mp->m_fsname);
                if (!mp->m_rtdev_targp)
                        goto out_free_ddev_targ;
        }
  
        if (logdev && logdev != ddev) {
-               mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1, mp->m_fsname);
+               mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1,
+                                                       mp->m_fsname);
                if (!mp->m_logdev_targp)
                        goto out_free_rtdev_targ;
        } else {
@@@ -971,12 -973,7 +972,7 @@@ xfs_fs_inode_init_once
  
  /*
   * Dirty the XFS inode when mark_inode_dirty_sync() is called so that
-  * we catch unlogged VFS level updates to the inode. Care must be taken
-  * here - the transaction code calls mark_inode_dirty_sync() to mark the
-  * VFS inode dirty in a transaction and clears the i_update_core field;
-  * it must clear the field after calling mark_inode_dirty_sync() to
-  * correctly indicate that the dirty state has been propagated into the
-  * inode log item.
+  * we catch unlogged VFS level updates to the inode.
   *
   * We need the barrier() to maintain correct ordering between unlogged
   * updates and the transaction commit code that clears the i_update_core
@@@ -1520,8 -1517,9 +1516,9 @@@ xfs_fs_fill_super
        if (error)
                goto out_free_fsname;
  
-       if (xfs_icsb_init_counters(mp))
-               mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
+       error = xfs_icsb_init_counters(mp);
+       if (error)
+               goto out_close_devices;
  
        error = xfs_readsb(mp, flags);
        if (error)
        xfs_freesb(mp);
   out_destroy_counters:
        xfs_icsb_destroy_counters(mp);
+  out_close_devices:
        xfs_close_devices(mp);
   out_free_fsname:
        xfs_free_fsname(mp);
@@@ -124,7 -124,7 +124,7 @@@ DEFINE_EVENT(xfs_perag_class, name,        
                 unsigned long caller_ip),                                      \
        TP_ARGS(mp, agno, refcount, caller_ip))
  DEFINE_PERAG_REF_EVENT(xfs_perag_get);
- DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim);
+ DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
  DEFINE_PERAG_REF_EVENT(xfs_perag_put);
  DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
  DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
@@@ -325,12 -325,13 +325,12 @@@ DEFINE_BUF_EVENT(xfs_buf_lock)
  DEFINE_BUF_EVENT(xfs_buf_lock_done);
  DEFINE_BUF_EVENT(xfs_buf_cond_lock);
  DEFINE_BUF_EVENT(xfs_buf_unlock);
 -DEFINE_BUF_EVENT(xfs_buf_ordered_retry);
  DEFINE_BUF_EVENT(xfs_buf_iowait);
  DEFINE_BUF_EVENT(xfs_buf_iowait_done);
  DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
  DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue);
  DEFINE_BUF_EVENT(xfs_buf_delwri_split);
- DEFINE_BUF_EVENT(xfs_buf_get_noaddr);
+ DEFINE_BUF_EVENT(xfs_buf_get_uncached);
  DEFINE_BUF_EVENT(xfs_bdstrat_shut);
  DEFINE_BUF_EVENT(xfs_buf_item_relse);
  DEFINE_BUF_EVENT(xfs_buf_item_iodone);
diff --combined fs/xfs/xfs_log.c
@@@ -916,6 -916,19 +916,6 @@@ xlog_iodone(xfs_buf_t *bp
        aborted = 0;
        l = iclog->ic_log;
  
 -      /*
 -       * If the _XFS_BARRIER_FAILED flag was set by a lower
 -       * layer, it means the underlying device no longer supports
 -       * barrier I/O. Warn loudly and turn off barriers.
 -       */
 -      if (bp->b_flags & _XFS_BARRIER_FAILED) {
 -              bp->b_flags &= ~_XFS_BARRIER_FAILED;
 -              l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER;
 -              xfs_fs_cmn_err(CE_WARN, l->l_mp,
 -                              "xlog_iodone: Barriers are no longer supported"
 -                              " by device. Disabling barriers\n");
 -      }
 -
        /*
         * Race to shutdown the filesystem if we see an error.
         */
@@@ -1118,7 -1131,8 +1118,8 @@@ xlog_alloc_log(xfs_mount_t      *mp
                iclog->ic_prev = prev_iclog;
                prev_iclog = iclog;
  
-               bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp);
+               bp = xfs_buf_get_uncached(mp->m_logdev_targp,
+                                               log->l_iclog_size, 0);
                if (!bp)
                        goto out_free_iclog;
                if (!XFS_BUF_CPSEMA(bp))
@@@ -1296,7 -1310,7 +1297,7 @@@ xlog_bdstrat
        if (iclog->ic_state & XLOG_STATE_IOERROR) {
                XFS_BUF_ERROR(bp, EIO);
                XFS_BUF_STALE(bp);
-               xfs_biodone(bp);
+               xfs_buf_ioend(bp, 0);
                /*
                 * It would seem logical to return EIO here, but we rely on
                 * the log state machine to propagate I/O errors instead of