blkcg: make blkg_[rw]stat_recursive_sum() to be able to index into blkcg_gq
[cascardo/linux.git] / block / blk-cgroup.c
index d6283b3..b263207 100644 (file)
@@ -68,9 +68,11 @@ static void blkg_free(struct blkcg_gq *blkg)
                return;
 
        for (i = 0; i < BLKCG_MAX_POLS; i++)
-               kfree(blkg->pd[i]);
+               if (blkg->pd[i])
+                       blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
 
-       blk_exit_rl(&blkg->rl);
+       if (blkg->blkcg != &blkcg_root)
+               blk_exit_rl(&blkg->rl);
        kfree(blkg);
 }
 
@@ -113,7 +115,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
                        continue;
 
                /* alloc per-policy data and attach it to blkg */
-               pd = kzalloc_node(pol->pd_size, gfp_mask, q->node);
+               pd = pol->pd_alloc_fn(gfp_mask, q->node);
                if (!pd)
                        goto err_free;
 
@@ -129,26 +131,11 @@ err_free:
        return NULL;
 }
 
-/**
- * __blkg_lookup - internal version of blkg_lookup()
- * @blkcg: blkcg of interest
- * @q: request_queue of interest
- * @update_hint: whether to update lookup hint with the result or not
- *
- * This is internal version and shouldn't be used by policy
- * implementations.  Looks up blkgs for the @blkcg - @q pair regardless of
- * @q's bypass state.  If @update_hint is %true, the caller should be
- * holding @q->queue_lock and lookup hint is updated on success.
- */
-struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q,
-                              bool update_hint)
+struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
+                                     struct request_queue *q, bool update_hint)
 {
        struct blkcg_gq *blkg;
 
-       blkg = rcu_dereference(blkcg->blkg_hint);
-       if (blkg && blkg->q == q)
-               return blkg;
-
        /*
         * Hint didn't match.  Look up from the radix tree.  Note that the
         * hint can only be updated under queue_lock as otherwise @blkg
@@ -166,29 +153,11 @@ struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q,
 
        return NULL;
 }
-
-/**
- * blkg_lookup - lookup blkg for the specified blkcg - q pair
- * @blkcg: blkcg of interest
- * @q: request_queue of interest
- *
- * Lookup blkg for the @blkcg - @q pair.  This function should be called
- * under RCU read lock and is guaranteed to return %NULL if @q is bypassing
- * - see blk_queue_bypass_start() for details.
- */
-struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q)
-{
-       WARN_ON_ONCE(!rcu_read_lock_held());
-
-       if (unlikely(blk_queue_bypass(q)))
-               return NULL;
-       return __blkg_lookup(blkcg, q, false);
-}
-EXPORT_SYMBOL_GPL(blkg_lookup);
+EXPORT_SYMBOL_GPL(blkg_lookup_slowpath);
 
 /*
  * If @new_blkg is %NULL, this function tries to allocate a new one as
- * necessary using %GFP_ATOMIC.  @new_blkg is always consumed on return.
+ * necessary using %GFP_NOWAIT.  @new_blkg is always consumed on return.
  */
 static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
                                    struct request_queue *q,
@@ -208,7 +177,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
        }
 
        wb_congested = wb_congested_get_create(&q->backing_dev_info,
-                                              blkcg->css.id, GFP_ATOMIC);
+                                              blkcg->css.id, GFP_NOWAIT);
        if (!wb_congested) {
                ret = -ENOMEM;
                goto err_put_css;
@@ -216,7 +185,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
 
        /* allocate */
        if (!new_blkg) {
-               new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC);
+               new_blkg = blkg_alloc(blkcg, q, GFP_NOWAIT);
                if (unlikely(!new_blkg)) {
                        ret = -ENOMEM;
                        goto err_put_congested;
@@ -240,7 +209,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
                struct blkcg_policy *pol = blkcg_policy[i];
 
                if (blkg->pd[i] && pol->pd_init_fn)
-                       pol->pd_init_fn(blkg);
+                       pol->pd_init_fn(blkg->pd[i]);
        }
 
        /* insert */
@@ -254,7 +223,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
                        struct blkcg_policy *pol = blkcg_policy[i];
 
                        if (blkg->pd[i] && pol->pd_online_fn)
-                               pol->pd_online_fn(blkg);
+                               pol->pd_online_fn(blkg->pd[i]);
                }
        }
        blkg->online = true;
@@ -327,7 +296,6 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
                        return blkg;
        }
 }
-EXPORT_SYMBOL_GPL(blkg_lookup_create);
 
 static void blkg_destroy(struct blkcg_gq *blkg)
 {
@@ -345,7 +313,7 @@ static void blkg_destroy(struct blkcg_gq *blkg)
                struct blkcg_policy *pol = blkcg_policy[i];
 
                if (blkg->pd[i] && pol->pd_offline_fn)
-                       pol->pd_offline_fn(blkg);
+                       pol->pd_offline_fn(blkg->pd[i]);
        }
        blkg->online = false;
 
@@ -400,15 +368,6 @@ static void blkg_destroy_all(struct request_queue *q)
 void __blkg_release_rcu(struct rcu_head *rcu_head)
 {
        struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head);
-       int i;
-
-       /* tell policies that this one is being freed */
-       for (i = 0; i < BLKCG_MAX_POLS; i++) {
-               struct blkcg_policy *pol = blkcg_policy[i];
-
-               if (blkg->pd[i] && pol->pd_exit_fn)
-                       pol->pd_exit_fn(blkg);
-       }
 
        /* release the blkcg and parent blkg refs this blkg has been holding */
        css_put(&blkg->blkcg->css);
@@ -475,9 +434,8 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
                for (i = 0; i < BLKCG_MAX_POLS; i++) {
                        struct blkcg_policy *pol = blkcg_policy[i];
 
-                       if (blkcg_policy_enabled(blkg->q, pol) &&
-                           pol->pd_reset_stats_fn)
-                               pol->pd_reset_stats_fn(blkg);
+                       if (blkg->pd[i] && pol->pd_reset_stats_fn)
+                               pol->pd_reset_stats_fn(blkg->pd[i]);
                }
        }
 
@@ -581,9 +539,10 @@ u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
 
        for (i = 0; i < BLKG_RWSTAT_NR; i++)
                seq_printf(sf, "%s %s %llu\n", dname, rwstr[i],
-                          (unsigned long long)rwstat->cnt[i]);
+                          (unsigned long long)atomic64_read(&rwstat->aux_cnt[i]));
 
-       v = rwstat->cnt[BLKG_RWSTAT_READ] + rwstat->cnt[BLKG_RWSTAT_WRITE];
+       v = atomic64_read(&rwstat->aux_cnt[BLKG_RWSTAT_READ]) +
+               atomic64_read(&rwstat->aux_cnt[BLKG_RWSTAT_WRITE]);
        seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v);
        return v;
 }
@@ -622,29 +581,39 @@ EXPORT_SYMBOL_GPL(blkg_prfill_rwstat);
 
 /**
  * blkg_stat_recursive_sum - collect hierarchical blkg_stat
- * @pd: policy private data of interest
- * @off: offset to the blkg_stat in @pd
+ * @blkg: blkg of interest
+ * @pol: blkcg_policy which contains the blkg_stat
+ * @off: offset to the blkg_stat in blkg_policy_data or @blkg
+ *
+ * Collect the blkg_stat specified by @blkg, @pol and @off and all its
+ * online descendants and their aux counts.  The caller must be holding the
+ * queue lock for online tests.
  *
- * Collect the blkg_stat specified by @off from @pd and all its online
- * descendants and return the sum.  The caller must be holding the queue
- * lock for online tests.
+ * If @pol is NULL, blkg_stat is at @off bytes into @blkg; otherwise, it is
+ * at @off bytes into @blkg's blkg_policy_data of the policy.
  */
-u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off)
+u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg,
+                           struct blkcg_policy *pol, int off)
 {
-       struct blkcg_policy *pol = blkcg_policy[pd->plid];
        struct blkcg_gq *pos_blkg;
        struct cgroup_subsys_state *pos_css;
        u64 sum = 0;
 
-       lockdep_assert_held(pd->blkg->q->queue_lock);
+       lockdep_assert_held(blkg->q->queue_lock);
 
        rcu_read_lock();
-       blkg_for_each_descendant_pre(pos_blkg, pos_css, pd_to_blkg(pd)) {
-               struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol);
-               struct blkg_stat *stat = (void *)pos_pd + off;
+       blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
+               struct blkg_stat *stat;
 
-               if (pos_blkg->online)
-                       sum += blkg_stat_read(stat);
+               if (!pos_blkg->online)
+                       continue;
+
+               if (pol)
+                       stat = (void *)blkg_to_pd(pos_blkg, pol) + off;
+               else
+                       stat = (void *)blkg + off;
+
+               sum += blkg_stat_read(stat) + atomic64_read(&stat->aux_cnt);
        }
        rcu_read_unlock();
 
@@ -654,37 +623,45 @@ EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum);
 
 /**
  * blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat
- * @pd: policy private data of interest
- * @off: offset to the blkg_stat in @pd
+ * @blkg: blkg of interest
+ * @pol: blkcg_policy which contains the blkg_rwstat
+ * @off: offset to the blkg_rwstat in blkg_policy_data or @blkg
  *
- * Collect the blkg_rwstat specified by @off from @pd and all its online
- * descendants and return the sum.  The caller must be holding the queue
- * lock for online tests.
+ * Collect the blkg_rwstat specified by @blkg, @pol and @off and all its
+ * online descendants and their aux counts.  The caller must be holding the
+ * queue lock for online tests.
+ *
+ * If @pol is NULL, blkg_rwstat is at @off bytes into @blkg; otherwise, it
+ * is at @off bytes into @blkg's blkg_policy_data of the policy.
  */
-struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd,
-                                            int off)
+struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg,
+                                            struct blkcg_policy *pol, int off)
 {
-       struct blkcg_policy *pol = blkcg_policy[pd->plid];
        struct blkcg_gq *pos_blkg;
        struct cgroup_subsys_state *pos_css;
        struct blkg_rwstat sum = { };
        int i;
 
-       lockdep_assert_held(pd->blkg->q->queue_lock);
+       lockdep_assert_held(blkg->q->queue_lock);
 
        rcu_read_lock();
-       blkg_for_each_descendant_pre(pos_blkg, pos_css, pd_to_blkg(pd)) {
-               struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol);
-               struct blkg_rwstat *rwstat = (void *)pos_pd + off;
-               struct blkg_rwstat tmp;
+       blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
+               struct blkg_rwstat *rwstat, tmp;
 
                if (!pos_blkg->online)
                        continue;
 
+               if (pol)
+                       rwstat = (void *)blkg_to_pd(pos_blkg, pol) + off;
+               else
+                       rwstat = (void *)pos_blkg + off;
+
                tmp = blkg_rwstat_read(rwstat);
 
                for (i = 0; i < BLKG_RWSTAT_NR; i++)
-                       sum.cnt[i] += tmp.cnt[i];
+                       atomic64_add(atomic64_read(&tmp.aux_cnt[i]) +
+                                    atomic64_read(&rwstat->aux_cnt[i]),
+                                    &sum.aux_cnt[i]);
        }
        rcu_read_unlock();
 
@@ -822,18 +799,19 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css)
 static void blkcg_css_free(struct cgroup_subsys_state *css)
 {
        struct blkcg *blkcg = css_to_blkcg(css);
+       int i;
 
        mutex_lock(&blkcg_pol_mutex);
+
        list_del(&blkcg->all_blkcgs_node);
-       mutex_unlock(&blkcg_pol_mutex);
 
-       if (blkcg != &blkcg_root) {
-               int i;
+       for (i = 0; i < BLKCG_MAX_POLS; i++)
+               if (blkcg->cpd[i])
+                       blkcg_policy[i]->cpd_free_fn(blkcg->cpd[i]);
 
-               for (i = 0; i < BLKCG_MAX_POLS; i++)
-                       kfree(blkcg->pd[i]);
-               kfree(blkcg);
-       }
+       mutex_unlock(&blkcg_pol_mutex);
+
+       kfree(blkcg);
 }
 
 static struct cgroup_subsys_state *
@@ -847,13 +825,12 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
 
        if (!parent_css) {
                blkcg = &blkcg_root;
-               goto done;
-       }
-
-       blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
-       if (!blkcg) {
-               ret = ERR_PTR(-ENOMEM);
-               goto free_blkcg;
+       } else {
+               blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
+               if (!blkcg) {
+                       ret = ERR_PTR(-ENOMEM);
+                       goto free_blkcg;
+               }
        }
 
        for (i = 0; i < BLKCG_MAX_POLS ; i++) {
@@ -866,23 +843,23 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
                 * check if the policy requires any specific per-cgroup
                 * data: if it does, allocate and initialize it.
                 */
-               if (!pol || !pol->cpd_size)
+               if (!pol || !pol->cpd_alloc_fn)
                        continue;
 
-               BUG_ON(blkcg->pd[i]);
-               cpd = kzalloc(pol->cpd_size, GFP_KERNEL);
+               cpd = pol->cpd_alloc_fn(GFP_KERNEL);
                if (!cpd) {
                        ret = ERR_PTR(-ENOMEM);
                        goto free_pd_blkcg;
                }
-               blkcg->pd[i] = cpd;
+               blkcg->cpd[i] = cpd;
+               cpd->blkcg = blkcg;
                cpd->plid = i;
-               pol->cpd_init_fn(blkcg);
+               if (pol->cpd_init_fn)
+                       pol->cpd_init_fn(cpd);
        }
 
-done:
        spin_lock_init(&blkcg->lock);
-       INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC);
+       INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_NOWAIT);
        INIT_HLIST_HEAD(&blkcg->blkg_list);
 #ifdef CONFIG_CGROUP_WRITEBACK
        INIT_LIST_HEAD(&blkcg->cgwb_list);
@@ -894,7 +871,8 @@ done:
 
 free_pd_blkcg:
        for (i--; i >= 0; i--)
-               kfree(blkcg->pd[i]);
+               if (blkcg->cpd[i])
+                       blkcg_policy[i]->cpd_free_fn(blkcg->cpd[i]);
 free_blkcg:
        kfree(blkcg);
        mutex_unlock(&blkcg_pol_mutex);
@@ -938,7 +916,7 @@ int blkcg_init_queue(struct request_queue *q)
                radix_tree_preload_end();
 
        if (IS_ERR(blkg)) {
-               kfree(new_blkg);
+               blkg_free(new_blkg);
                return PTR_ERR(blkg);
        }
 
@@ -1051,65 +1029,54 @@ EXPORT_SYMBOL_GPL(blkio_cgrp_subsys);
 int blkcg_activate_policy(struct request_queue *q,
                          const struct blkcg_policy *pol)
 {
-       LIST_HEAD(pds);
+       struct blkg_policy_data *pd_prealloc = NULL;
        struct blkcg_gq *blkg;
-       struct blkg_policy_data *pd, *nd;
-       int cnt = 0, ret;
+       int ret;
 
        if (blkcg_policy_enabled(q, pol))
                return 0;
 
-       /* count and allocate policy_data for all existing blkgs */
        blk_queue_bypass_start(q);
-       spin_lock_irq(q->queue_lock);
-       list_for_each_entry(blkg, &q->blkg_list, q_node)
-               cnt++;
-       spin_unlock_irq(q->queue_lock);
-
-       /* allocate per-blkg policy data for all existing blkgs */
-       while (cnt--) {
-               pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node);
-               if (!pd) {
+pd_prealloc:
+       if (!pd_prealloc) {
+               pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q->node);
+               if (!pd_prealloc) {
                        ret = -ENOMEM;
-                       goto out_free;
+                       goto out_bypass_end;
                }
-               list_add_tail(&pd->alloc_node, &pds);
        }
 
-       /*
-        * Install the allocated pds and cpds. With @q bypassing, no new blkg
-        * should have been created while the queue lock was dropped.
-        */
        spin_lock_irq(q->queue_lock);
 
        list_for_each_entry(blkg, &q->blkg_list, q_node) {
-               if (WARN_ON(list_empty(&pds))) {
-                       /* umm... this shouldn't happen, just abort */
-                       ret = -ENOMEM;
-                       goto out_unlock;
-               }
-               pd = list_first_entry(&pds, struct blkg_policy_data, alloc_node);
-               list_del_init(&pd->alloc_node);
+               struct blkg_policy_data *pd;
 
-               /* grab blkcg lock too while installing @pd on @blkg */
-               spin_lock(&blkg->blkcg->lock);
+               if (blkg->pd[pol->plid])
+                       continue;
+
+               pd = pol->pd_alloc_fn(GFP_NOWAIT, q->node);
+               if (!pd)
+                       swap(pd, pd_prealloc);
+               if (!pd) {
+                       spin_unlock_irq(q->queue_lock);
+                       goto pd_prealloc;
+               }
 
                blkg->pd[pol->plid] = pd;
                pd->blkg = blkg;
                pd->plid = pol->plid;
-               pol->pd_init_fn(blkg);
-
-               spin_unlock(&blkg->blkcg->lock);
+               if (pol->pd_init_fn)
+                       pol->pd_init_fn(pd);
        }
 
        __set_bit(pol->plid, q->blkcg_pols);
        ret = 0;
-out_unlock:
+
        spin_unlock_irq(q->queue_lock);
-out_free:
+out_bypass_end:
        blk_queue_bypass_end(q);
-       list_for_each_entry_safe(pd, nd, &pds, alloc_node)
-               kfree(pd);
+       if (pd_prealloc)
+               pol->pd_free_fn(pd_prealloc);
        return ret;
 }
 EXPORT_SYMBOL_GPL(blkcg_activate_policy);
@@ -1139,13 +1106,12 @@ void blkcg_deactivate_policy(struct request_queue *q,
                /* grab blkcg lock too while removing @pd from @blkg */
                spin_lock(&blkg->blkcg->lock);
 
-               if (pol->pd_offline_fn)
-                       pol->pd_offline_fn(blkg);
-               if (pol->pd_exit_fn)
-                       pol->pd_exit_fn(blkg);
-
-               kfree(blkg->pd[pol->plid]);
-               blkg->pd[pol->plid] = NULL;
+               if (blkg->pd[pol->plid]) {
+                       if (pol->pd_offline_fn)
+                               pol->pd_offline_fn(blkg->pd[pol->plid]);
+                       pol->pd_free_fn(blkg->pd[pol->plid]);
+                       blkg->pd[pol->plid] = NULL;
+               }
 
                spin_unlock(&blkg->blkcg->lock);
        }
@@ -1167,9 +1133,6 @@ int blkcg_policy_register(struct blkcg_policy *pol)
        struct blkcg *blkcg;
        int i, ret;
 
-       if (WARN_ON(pol->pd_size < sizeof(struct blkg_policy_data)))
-               return -EINVAL;
-
        mutex_lock(&blkcg_pol_register_mutex);
        mutex_lock(&blkcg_pol_mutex);
 
@@ -1186,19 +1149,20 @@ int blkcg_policy_register(struct blkcg_policy *pol)
        blkcg_policy[pol->plid] = pol;
 
        /* allocate and install cpd's */
-       if (pol->cpd_size) {
+       if (pol->cpd_alloc_fn) {
                list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
                        struct blkcg_policy_data *cpd;
 
-                       cpd = kzalloc(pol->cpd_size, GFP_KERNEL);
+                       cpd = pol->cpd_alloc_fn(GFP_KERNEL);
                        if (!cpd) {
                                mutex_unlock(&blkcg_pol_mutex);
                                goto err_free_cpds;
                        }
 
-                       blkcg->pd[pol->plid] = cpd;
+                       blkcg->cpd[pol->plid] = cpd;
+                       cpd->blkcg = blkcg;
                        cpd->plid = pol->plid;
-                       pol->cpd_init_fn(blkcg);
+                       pol->cpd_init_fn(cpd);
                }
        }
 
@@ -1212,10 +1176,12 @@ int blkcg_policy_register(struct blkcg_policy *pol)
        return 0;
 
 err_free_cpds:
-       if (pol->cpd_size) {
+       if (pol->cpd_alloc_fn) {
                list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
-                       kfree(blkcg->pd[pol->plid]);
-                       blkcg->pd[pol->plid] = NULL;
+                       if (blkcg->cpd[pol->plid]) {
+                               pol->cpd_free_fn(blkcg->cpd[pol->plid]);
+                               blkcg->cpd[pol->plid] = NULL;
+                       }
                }
        }
        blkcg_policy[pol->plid] = NULL;
@@ -1248,10 +1214,12 @@ void blkcg_policy_unregister(struct blkcg_policy *pol)
        /* remove cpds and unregister */
        mutex_lock(&blkcg_pol_mutex);
 
-       if (pol->cpd_size) {
+       if (pol->cpd_alloc_fn) {
                list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
-                       kfree(blkcg->pd[pol->plid]);
-                       blkcg->pd[pol->plid] = NULL;
+                       if (blkcg->cpd[pol->plid]) {
+                               pol->cpd_free_fn(blkcg->cpd[pol->plid]);
+                               blkcg->cpd[pol->plid] = NULL;
+                       }
                }
        }
        blkcg_policy[pol->plid] = NULL;