Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[cascardo/linux.git] / fs / super.c
index 24a76bc..954aeb8 100644 (file)
@@ -135,6 +135,24 @@ static unsigned long super_cache_count(struct shrinker *shrink,
        return total_objects;
 }
 
+static void destroy_super_work(struct work_struct *work)
+{
+       struct super_block *s = container_of(work, struct super_block,
+                                                       destroy_work);
+       int i;
+
+       for (i = 0; i < SB_FREEZE_LEVELS; i++)
+               percpu_free_rwsem(&s->s_writers.rw_sem[i]);
+       kfree(s);
+}
+
+static void destroy_super_rcu(struct rcu_head *head)
+{
+       struct super_block *s = container_of(head, struct super_block, rcu);
+       INIT_WORK(&s->destroy_work, destroy_super_work);
+       schedule_work(&s->destroy_work);
+}
+
 /**
  *     destroy_super   -       frees a superblock
  *     @s: superblock to free
@@ -143,16 +161,13 @@ static unsigned long super_cache_count(struct shrinker *shrink,
  */
 static void destroy_super(struct super_block *s)
 {
-       int i;
        list_lru_destroy(&s->s_dentry_lru);
        list_lru_destroy(&s->s_inode_lru);
-       for (i = 0; i < SB_FREEZE_LEVELS; i++)
-               percpu_counter_destroy(&s->s_writers.counter[i]);
        security_sb_free(s);
        WARN_ON(!list_empty(&s->s_mounts));
        kfree(s->s_subtype);
        kfree(s->s_options);
-       kfree_rcu(s, rcu);
+       call_rcu(&s->rcu, destroy_super_rcu);
 }
 
 /**
@@ -178,19 +193,19 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
                goto fail;
 
        for (i = 0; i < SB_FREEZE_LEVELS; i++) {
-               if (percpu_counter_init(&s->s_writers.counter[i], 0,
-                                       GFP_KERNEL) < 0)
+               if (__percpu_init_rwsem(&s->s_writers.rw_sem[i],
+                                       sb_writers_name[i],
+                                       &type->s_writers_key[i]))
                        goto fail;
-               lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
-                                &type->s_writers_key[i], 0);
        }
-       init_waitqueue_head(&s->s_writers.wait);
        init_waitqueue_head(&s->s_writers.wait_unfrozen);
        s->s_bdi = &noop_backing_dev_info;
        s->s_flags = flags;
        INIT_HLIST_NODE(&s->s_instances);
        INIT_HLIST_BL_HEAD(&s->s_anon);
+       mutex_init(&s->s_sync_lock);
        INIT_LIST_HEAD(&s->s_inodes);
+       spin_lock_init(&s->s_inode_list_lock);
 
        if (list_lru_init_memcg(&s->s_dentry_lru))
                goto fail;
@@ -399,7 +414,7 @@ void generic_shutdown_super(struct super_block *sb)
                sync_filesystem(sb);
                sb->s_flags &= ~MS_ACTIVE;
 
-               fsnotify_unmount_inodes(&sb->s_inodes);
+               fsnotify_unmount_inodes(sb);
 
                evict_inodes(sb);
 
@@ -1146,47 +1161,10 @@ out:
  */
 void __sb_end_write(struct super_block *sb, int level)
 {
-       percpu_counter_dec(&sb->s_writers.counter[level-1]);
-       /*
-        * Make sure s_writers are updated before we wake up waiters in
-        * freeze_super().
-        */
-       smp_mb();
-       if (waitqueue_active(&sb->s_writers.wait))
-               wake_up(&sb->s_writers.wait);
-       rwsem_release(&sb->s_writers.lock_map[level-1], 1, _RET_IP_);
+       percpu_up_read(sb->s_writers.rw_sem + level-1);
 }
 EXPORT_SYMBOL(__sb_end_write);
 
-static int do_sb_start_write(struct super_block *sb, int level, bool wait,
-                               unsigned long ip)
-{
-       if (wait)
-               rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, 0, ip);
-retry:
-       if (unlikely(sb->s_writers.frozen >= level)) {
-               if (!wait)
-                       return 0;
-               wait_event(sb->s_writers.wait_unfrozen,
-                          sb->s_writers.frozen < level);
-       }
-
-       percpu_counter_inc(&sb->s_writers.counter[level-1]);
-       /*
-        * Make sure counter is updated before we check for frozen.
-        * freeze_super() first sets frozen and then checks the counter.
-        */
-       smp_mb();
-       if (unlikely(sb->s_writers.frozen >= level)) {
-               __sb_end_write(sb, level);
-               goto retry;
-       }
-
-       if (!wait)
-               rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, 1, ip);
-       return 1;
-}
-
 /*
  * This is an internal function, please use sb_start_{write,pagefault,intwrite}
  * instead.
@@ -1194,7 +1172,7 @@ retry:
 int __sb_start_write(struct super_block *sb, int level, bool wait)
 {
        bool force_trylock = false;
-       int ret;
+       int ret = 1;
 
 #ifdef CONFIG_LOCKDEP
        /*
@@ -1210,13 +1188,17 @@ int __sb_start_write(struct super_block *sb, int level, bool wait)
                int i;
 
                for (i = 0; i < level - 1; i++)
-                       if (lock_is_held(&sb->s_writers.lock_map[i])) {
+                       if (percpu_rwsem_is_held(sb->s_writers.rw_sem + i)) {
                                force_trylock = true;
                                break;
                        }
        }
 #endif
-       ret = do_sb_start_write(sb, level, wait && !force_trylock, _RET_IP_);
+       if (wait && !force_trylock)
+               percpu_down_read(sb->s_writers.rw_sem + level-1);
+       else
+               ret = percpu_down_read_trylock(sb->s_writers.rw_sem + level-1);
+
        WARN_ON(force_trylock & !ret);
        return ret;
 }
@@ -1228,37 +1210,33 @@ EXPORT_SYMBOL(__sb_start_write);
  * @level: type of writers we wait for (normal vs page fault)
  *
  * This function waits until there are no writers of given type to given file
- * system. Caller of this function should make sure there can be no new writers
- * of type @level before calling this function. Otherwise this function can
- * livelock.
+ * system.
  */
 static void sb_wait_write(struct super_block *sb, int level)
 {
-       s64 writers;
-
+       percpu_down_write(sb->s_writers.rw_sem + level-1);
        /*
-        * We just cycle-through lockdep here so that it does not complain
-        * about returning with lock to userspace
+        * We are going to return to userspace and forget about this lock, the
+        * ownership goes to the caller of thaw_super() which does unlock.
+        *
+        * FIXME: we should do this before return from freeze_super() after we
+        * called sync_filesystem(sb) and s_op->freeze_fs(sb), and thaw_super()
+        * should re-acquire these locks before s_op->unfreeze_fs(sb). However
+        * this leads to lockdep false-positives, so currently we do the early
+        * release right after acquire.
         */
-       rwsem_acquire(&sb->s_writers.lock_map[level-1], 0, 0, _THIS_IP_);
-       rwsem_release(&sb->s_writers.lock_map[level-1], 1, _THIS_IP_);
-
-       do {
-               DEFINE_WAIT(wait);
+       percpu_rwsem_release(sb->s_writers.rw_sem + level-1, 0, _THIS_IP_);
+}
 
-               /*
-                * We use a barrier in prepare_to_wait() to separate setting
-                * of frozen and checking of the counter
-                */
-               prepare_to_wait(&sb->s_writers.wait, &wait,
-                               TASK_UNINTERRUPTIBLE);
+static void sb_freeze_unlock(struct super_block *sb)
+{
+       int level;
 
-               writers = percpu_counter_sum(&sb->s_writers.counter[level-1]);
-               if (writers)
-                       schedule();
+       for (level = 0; level < SB_FREEZE_LEVELS; ++level)
+               percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
 
-               finish_wait(&sb->s_writers.wait, &wait);
-       } while (writers);
+       for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
+               percpu_up_write(sb->s_writers.rw_sem + level);
 }
 
 /**
@@ -1317,20 +1295,14 @@ int freeze_super(struct super_block *sb)
                return 0;
        }
 
-       /* From now on, no new normal writers can start */
        sb->s_writers.frozen = SB_FREEZE_WRITE;
-       smp_wmb();
-
        /* Release s_umount to preserve sb_start_write -> s_umount ordering */
        up_write(&sb->s_umount);
-
        sb_wait_write(sb, SB_FREEZE_WRITE);
+       down_write(&sb->s_umount);
 
        /* Now we go and block page faults... */
-       down_write(&sb->s_umount);
        sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
-       smp_wmb();
-
        sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
 
        /* All writers are done so after syncing there won't be dirty data */
@@ -1338,7 +1310,6 @@ int freeze_super(struct super_block *sb)
 
        /* Now wait for internal filesystem counter */
        sb->s_writers.frozen = SB_FREEZE_FS;
-       smp_wmb();
        sb_wait_write(sb, SB_FREEZE_FS);
 
        if (sb->s_op->freeze_fs) {
@@ -1347,7 +1318,7 @@ int freeze_super(struct super_block *sb)
                        printk(KERN_ERR
                                "VFS:Filesystem freeze failed\n");
                        sb->s_writers.frozen = SB_UNFROZEN;
-                       smp_wmb();
+                       sb_freeze_unlock(sb);
                        wake_up(&sb->s_writers.wait_unfrozen);
                        deactivate_locked_super(sb);
                        return ret;
@@ -1379,8 +1350,10 @@ int thaw_super(struct super_block *sb)
                return -EINVAL;
        }
 
-       if (sb->s_flags & MS_RDONLY)
+       if (sb->s_flags & MS_RDONLY) {
+               sb->s_writers.frozen = SB_UNFROZEN;
                goto out;
+       }
 
        if (sb->s_op->unfreeze_fs) {
                error = sb->s_op->unfreeze_fs(sb);
@@ -1392,12 +1365,11 @@ int thaw_super(struct super_block *sb)
                }
        }
 
-out:
        sb->s_writers.frozen = SB_UNFROZEN;
-       smp_wmb();
+       sb_freeze_unlock(sb);
+out:
        wake_up(&sb->s_writers.wait_unfrozen);
        deactivate_locked_super(sb);
-
        return 0;
 }
 EXPORT_SYMBOL(thaw_super);