Merge branch 'stable/cleancache.v13' into linux-next
authorKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Mon, 19 Mar 2012 16:12:19 +0000 (12:12 -0400)
committerKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Mon, 19 Mar 2012 16:12:19 +0000 (12:12 -0400)
* stable/cleancache.v13:
  mm: cleancache: Use __read_mostly as appropiate.
  mm: cleancache: report statistics via debugfs instead of sysfs.
  mm: zcache/tmem/cleancache: s/flush/invalidate/
  mm: cleancache: s/flush/invalidate/

1  2 
drivers/staging/zcache/zcache-main.c
fs/block_dev.c
fs/super.c
mm/filemap.c

@@@ -358,8 -358,8 +358,8 @@@ static struct zbud_hdr *zbud_create(uin
        if (unlikely(zbpg == NULL))
                goto out;
        /* ok, have a page, now compress the data before taking locks */
 -      spin_lock(&zbpg->lock);
        spin_lock(&zbud_budlists_spinlock);
 +      spin_lock(&zbpg->lock);
        list_add_tail(&zbpg->bud_list, &zbud_unbuddied[nchunks].list);
        zbud_unbuddied[nchunks].count++;
        zh = &zbpg->buddy[0];
@@@ -389,11 -389,12 +389,11 @@@ init_zh
        zh->oid = *oid;
        zh->pool_id = pool_id;
        zh->client_id = client_id;
 -      /* can wait to copy the data until the list locks are dropped */
 -      spin_unlock(&zbud_budlists_spinlock);
 -
        to = zbud_data(zh, size);
        memcpy(to, cdata, size);
        spin_unlock(&zbpg->lock);
 +      spin_unlock(&zbud_budlists_spinlock);
 +
        zbud_cumul_chunk_counts[nchunks]++;
        atomic_inc(&zcache_zbud_curr_zpages);
        zcache_zbud_cumul_zpages++;
@@@ -654,8 -655,8 +654,8 @@@ static unsigned int zv_max_zsize = (PAG
   */
  static unsigned int zv_max_mean_zsize = (PAGE_SIZE / 8) * 5;
  
 -static unsigned long zv_curr_dist_counts[NCHUNKS];
 -static unsigned long zv_cumul_dist_counts[NCHUNKS];
 +static atomic_t zv_curr_dist_counts[NCHUNKS];
 +static atomic_t zv_cumul_dist_counts[NCHUNKS];
  
  static struct zv_hdr *zv_create(struct xv_pool *xvpool, uint32_t pool_id,
                                struct tmem_oid *oid, uint32_t index,
                        &page, &offset, ZCACHE_GFP_MASK);
        if (unlikely(ret))
                goto out;
 -      zv_curr_dist_counts[chunks]++;
 -      zv_cumul_dist_counts[chunks]++;
 +      atomic_inc(&zv_curr_dist_counts[chunks]);
 +      atomic_inc(&zv_cumul_dist_counts[chunks]);
        zv = kmap_atomic(page, KM_USER0) + offset;
        zv->index = index;
        zv->oid = *oid;
@@@ -697,7 -698,7 +697,7 @@@ static void zv_free(struct xv_pool *xvp
  
        ASSERT_SENTINEL(zv, ZVH);
        BUG_ON(chunks >= NCHUNKS);
 -      zv_curr_dist_counts[chunks]--;
 +      atomic_dec(&zv_curr_dist_counts[chunks]);
        size -= sizeof(*zv);
        BUG_ON(size == 0);
        INVERT_SENTINEL(zv, ZVH);
@@@ -737,7 -738,7 +737,7 @@@ static int zv_curr_dist_counts_show(cha
        char *p = buf;
  
        for (i = 0; i < NCHUNKS; i++) {
 -              n = zv_curr_dist_counts[i];
 +              n = atomic_read(&zv_curr_dist_counts[i]);
                p += sprintf(p, "%lu ", n);
                chunks += n;
                sum_total_chunks += i * n;
@@@ -753,7 -754,7 +753,7 @@@ static int zv_cumul_dist_counts_show(ch
        char *p = buf;
  
        for (i = 0; i < NCHUNKS; i++) {
 -              n = zv_cumul_dist_counts[i];
 +              n = atomic_read(&zv_cumul_dist_counts[i]);
                p += sprintf(p, "%lu ", n);
                chunks += n;
                sum_total_chunks += i * n;
@@@ -786,7 -787,7 +786,7 @@@ static ssize_t zv_max_zsize_store(struc
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
  
 -      err = strict_strtoul(buf, 10, &val);
 +      err = kstrtoul(buf, 10, &val);
        if (err || (val == 0) || (val > (PAGE_SIZE / 8) * 7))
                return -EINVAL;
        zv_max_zsize = val;
@@@ -818,7 -819,7 +818,7 @@@ static ssize_t zv_max_mean_zsize_store(
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
  
 -      err = strict_strtoul(buf, 10, &val);
 +      err = kstrtoul(buf, 10, &val);
        if (err || (val == 0) || (val > (PAGE_SIZE / 8) * 7))
                return -EINVAL;
        zv_max_mean_zsize = val;
@@@ -852,7 -853,7 +852,7 @@@ static ssize_t zv_page_count_policy_per
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
  
 -      err = strict_strtoul(buf, 10, &val);
 +      err = kstrtoul(buf, 10, &val);
        if (err || (val == 0) || (val > 150))
                return -EINVAL;
        zv_page_count_policy_percent = val;
@@@ -1757,9 -1758,9 +1757,9 @@@ static int zcache_cleancache_init_share
  static struct cleancache_ops zcache_cleancache_ops = {
        .put_page = zcache_cleancache_put_page,
        .get_page = zcache_cleancache_get_page,
-       .flush_page = zcache_cleancache_flush_page,
-       .flush_inode = zcache_cleancache_flush_inode,
-       .flush_fs = zcache_cleancache_flush_fs,
+       .invalidate_page = zcache_cleancache_flush_page,
+       .invalidate_inode = zcache_cleancache_flush_inode,
+       .invalidate_fs = zcache_cleancache_flush_fs,
        .init_shared_fs = zcache_cleancache_init_shared_fs,
        .init_fs = zcache_cleancache_init_fs
  };
@@@ -1781,9 -1782,9 +1781,9 @@@ static int zcache_frontswap_poolid = -1
   * Swizzling increases objects per swaptype, increasing tmem concurrency
   * for heavy swaploads.  Later, larger nr_cpus -> larger SWIZ_BITS
   * Setting SWIZ_BITS to 27 basically reconstructs the swap entry from
 - * frontswap_get_page()
 + * frontswap_get_page(), but has side-effects. Hence using 8.
   */
 -#define SWIZ_BITS             27
 +#define SWIZ_BITS             8
  #define SWIZ_MASK             ((1 << SWIZ_BITS) - 1)
  #define _oswiz(_type, _ind)   ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK))
  #define iswiz(_ind)           (_ind >> SWIZ_BITS)
@@@ -1867,8 -1868,8 +1867,8 @@@ static void zcache_frontswap_init(unsig
  static struct frontswap_ops zcache_frontswap_ops = {
        .put_page = zcache_frontswap_put_page,
        .get_page = zcache_frontswap_get_page,
-       .flush_page = zcache_frontswap_flush_page,
-       .flush_area = zcache_frontswap_flush_area,
+       .invalidate_page = zcache_frontswap_flush_page,
+       .invalidate_area = zcache_frontswap_flush_area,
        .init = zcache_frontswap_init
  };
  
diff --combined fs/block_dev.c
@@@ -25,6 -25,7 +25,6 @@@
  #include <linux/uio.h>
  #include <linux/namei.h>
  #include <linux/log2.h>
 -#include <linux/kmemleak.h>
  #include <linux/cleancache.h>
  #include <asm/uaccess.h>
  #include "internal.h"
@@@ -109,7 -110,7 +109,7 @@@ void invalidate_bdev(struct block_devic
        /* 99% of the time, we don't need to flush the cleancache on the bdev.
         * But, for the strange corners, lets be cautious
         */
-       cleancache_flush_inode(mapping);
+       cleancache_invalidate_inode(mapping);
  }
  EXPORT_SYMBOL(invalidate_bdev);
  
@@@ -520,7 -521,7 +520,7 @@@ static struct super_block *blockdev_sup
  void __init bdev_cache_init(void)
  {
        int err;
 -      struct vfsmount *bd_mnt;
 +      static struct vfsmount *bd_mnt;
  
        bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
                        0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
        bd_mnt = kern_mount(&bd_type);
        if (IS_ERR(bd_mnt))
                panic("Cannot create bdev pseudo-fs");
 -      /*
 -       * This vfsmount structure is only used to obtain the
 -       * blockdev_superblock, so tell kmemleak not to report it.
 -       */
 -      kmemleak_not_leak(bd_mnt);
 -      blockdev_superblock = bd_mnt->mnt_sb;   /* For writeback */
 +      blockdev_superblock = bd_mnt->mnt_sb;   /* For writeback */
  }
  
  /*
@@@ -1139,7 -1145,6 +1139,7 @@@ static int __blkdev_get(struct block_de
        mutex_lock_nested(&bdev->bd_mutex, for_part);
        if (!bdev->bd_openers) {
                bdev->bd_disk = disk;
 +              bdev->bd_queue = disk->queue;
                bdev->bd_contains = bdev;
                if (!partno) {
                        struct backing_dev_info *bdi;
                                        disk_put_part(bdev->bd_part);
                                        bdev->bd_part = NULL;
                                        bdev->bd_disk = NULL;
 +                                      bdev->bd_queue = NULL;
                                        mutex_unlock(&bdev->bd_mutex);
                                        disk_unblock_events(disk);
                                        put_disk(disk);
                         * The latter is necessary to prevent ghost
                         * partitions on a removed medium.
                         */
 -                      if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
 -                              rescan_partitions(disk, bdev);
 +                      if (bdev->bd_invalidated) {
 +                              if (!ret)
 +                                      rescan_partitions(disk, bdev);
 +                              else if (ret == -ENOMEDIUM)
 +                                      invalidate_partitions(disk, bdev);
 +                      }
                        if (ret)
                                goto out_clear;
                } else {
                        if (bdev->bd_disk->fops->open)
                                ret = bdev->bd_disk->fops->open(bdev, mode);
                        /* the same as first opener case, read comment there */
 -                      if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
 -                              rescan_partitions(bdev->bd_disk, bdev);
 +                      if (bdev->bd_invalidated) {
 +                              if (!ret)
 +                                      rescan_partitions(bdev->bd_disk, bdev);
 +                              else if (ret == -ENOMEDIUM)
 +                                      invalidate_partitions(bdev->bd_disk, bdev);
 +                      }
                        if (ret)
                                goto out_unlock_bdev;
                }
        disk_put_part(bdev->bd_part);
        bdev->bd_disk = NULL;
        bdev->bd_part = NULL;
 +      bdev->bd_queue = NULL;
        bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info);
        if (bdev != bdev->bd_contains)
                __blkdev_put(bdev->bd_contains, mode, 1);
diff --combined fs/super.c
@@@ -250,7 -250,7 +250,7 @@@ void deactivate_locked_super(struct sup
  {
        struct file_system_type *fs = s->s_type;
        if (atomic_dec_and_test(&s->s_active)) {
-               cleancache_flush_fs(s);
+               cleancache_invalidate_fs(s);
                fs->kill_sb(s);
  
                /* caches are now gone, we can safely kill the shrinker now */
@@@ -633,28 -633,6 +633,28 @@@ rescan
  
  EXPORT_SYMBOL(get_super);
  
 +/**
 + *    get_super_thawed - get thawed superblock of a device
 + *    @bdev: device to get the superblock for
 + *
 + *    Scans the superblock list and finds the superblock of the file system
 + *    mounted on the device. The superblock is returned once it is thawed
 + *    (or immediately if it was not frozen). %NULL is returned if no match
 + *    is found.
 + */
 +struct super_block *get_super_thawed(struct block_device *bdev)
 +{
 +      while (1) {
 +              struct super_block *s = get_super(bdev);
 +              if (!s || s->s_frozen == SB_UNFROZEN)
 +                      return s;
 +              up_read(&s->s_umount);
 +              vfs_check_frozen(s, SB_FREEZE_WRITE);
 +              put_super(s);
 +      }
 +}
 +EXPORT_SYMBOL(get_super_thawed);
 +
  /**
   * get_active_super - get an active reference to the superblock of a device
   * @bdev: device to get the superblock for
@@@ -1208,8 -1186,6 +1208,8 @@@ int freeze_super(struct super_block *sb
                        printk(KERN_ERR
                                "VFS:Filesystem freeze failed\n");
                        sb->s_frozen = SB_UNFROZEN;
 +                      smp_wmb();
 +                      wake_up(&sb->s_wait_unfrozen);
                        deactivate_locked_super(sb);
                        return ret;
                }
diff --combined mm/filemap.c
@@@ -123,7 -123,7 +123,7 @@@ void __delete_from_page_cache(struct pa
        if (PageUptodate(page) && PageMappedToDisk(page))
                cleancache_put_page(page);
        else
-               cleancache_flush_page(mapping, page);
+               cleancache_invalidate_page(mapping, page);
  
        radix_tree_delete(&mapping->page_tree, page->index);
        page->mapping = NULL;
@@@ -393,11 -393,24 +393,11 @@@ EXPORT_SYMBOL(filemap_write_and_wait_ra
  int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
  {
        int error;
 -      struct mem_cgroup *memcg = NULL;
  
        VM_BUG_ON(!PageLocked(old));
        VM_BUG_ON(!PageLocked(new));
        VM_BUG_ON(new->mapping);
  
 -      /*
 -       * This is not page migration, but prepare_migration and
 -       * end_migration does enough work for charge replacement.
 -       *
 -       * In the longer term we probably want a specialized function
 -       * for moving the charge from old to new in a more efficient
 -       * manner.
 -       */
 -      error = mem_cgroup_prepare_migration(old, new, &memcg, gfp_mask);
 -      if (error)
 -              return error;
 -
        error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
        if (!error) {
                struct address_space *mapping = old->mapping;
                if (PageSwapBacked(new))
                        __inc_zone_page_state(new, NR_SHMEM);
                spin_unlock_irq(&mapping->tree_lock);
 +              /* mem_cgroup codes must not be called under tree_lock */
 +              mem_cgroup_replace_page_cache(old, new);
                radix_tree_preload_end();
                if (freepage)
                        freepage(old);
                page_cache_release(old);
 -              mem_cgroup_end_migration(memcg, old, new, true);
 -      } else {
 -              mem_cgroup_end_migration(memcg, old, new, false);
        }
  
        return error;
@@@ -1400,12 -1414,15 +1400,12 @@@ generic_file_aio_read(struct kiocb *ioc
        unsigned long seg = 0;
        size_t count;
        loff_t *ppos = &iocb->ki_pos;
 -      struct blk_plug plug;
  
        count = 0;
        retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
        if (retval)
                return retval;
  
 -      blk_start_plug(&plug);
 -
        /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
        if (filp->f_flags & O_DIRECT) {
                loff_t size;
                        retval = filemap_write_and_wait_range(mapping, pos,
                                        pos + iov_length(iov, nr_segs) - 1);
                        if (!retval) {
 +                              struct blk_plug plug;
 +
 +                              blk_start_plug(&plug);
                                retval = mapping->a_ops->direct_IO(READ, iocb,
                                                        iov, pos, nr_segs);
 +                              blk_finish_plug(&plug);
                        }
                        if (retval > 0) {
                                *ppos = pos + retval;
                        break;
        }
  out:
 -      blk_finish_plug(&plug);
        return retval;
  }
  EXPORT_SYMBOL(generic_file_aio_read);
@@@ -2337,11 -2351,8 +2337,11 @@@ struct page *grab_cache_page_write_begi
                                        pgoff_t index, unsigned flags)
  {
        int status;
 +      gfp_t gfp_mask;
        struct page *page;
        gfp_t gfp_notmask = 0;
 +
 +      gfp_mask = mapping_gfp_mask(mapping) | __GFP_WRITE;
        if (flags & AOP_FLAG_NOFS)
                gfp_notmask = __GFP_FS;
  repeat:
        if (page)
                goto found;
  
 -      page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~gfp_notmask);
 +      page = __page_cache_alloc(gfp_mask & ~gfp_notmask);
        if (!page)
                return NULL;
        status = add_to_page_cache_lru(page, mapping, index,