Merge branch 'pm-cpufreq'
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>
Sat, 1 Oct 2016 23:42:45 +0000 (01:42 +0200)
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>
Sat, 1 Oct 2016 23:42:45 +0000 (01:42 +0200)
* pm-cpufreq: (24 commits)
  cpufreq: st: add missing \n to end of dev_err message
  cpufreq: kirkwood: add missing \n to end of dev_err messages
  cpufreq: CPPC: Avoid overflow when calculating desired_perf
  cpufreq: ti: Use generic platdev driver
  cpufreq: intel_pstate: Add io_boost trace
  cpufreq: intel_pstate: Use IOWAIT flag in Atom algorithm
  cpufreq: schedutil: Add iowait boosting
  cpufreq / sched: SCHED_CPUFREQ_IOWAIT flag to indicate iowait condition
  cpufreq: CPPC: Force reporting values in KHz to fix user space interface
  cpufreq: create link to policy only for registered CPUs
  intel_pstate: constify local structures
  cpufreq: dt: Support governor tunables per policy
  cpufreq: dt: Update kconfig description
  cpufreq: dt: Remove unused code
  MAINTAINERS: Add Documentation/cpu-freq/
  cpufreq: dt: Add support for r8a7792
  cpufreq / sched: ignore SMT when determining max cpu capacity
  cpufreq: Drop unnecessary check from cpufreq_policy_alloc()
  ARM: multi_v7_defconfig: Don't attempt to enable schedutil governor as module
  ARM: exynos_defconfig: Don't attempt to enable schedutil governor as module
  ...

22 files changed:
MAINTAINERS
arch/arm/configs/exynos_defconfig
arch/arm/configs/multi_v7_defconfig
drivers/cpufreq/Kconfig
drivers/cpufreq/cppc_cpufreq.c
drivers/cpufreq/cpufreq-dt-platdev.c
drivers/cpufreq/cpufreq-dt.c
drivers/cpufreq/cpufreq-dt.h [new file with mode: 0644]
drivers/cpufreq/cpufreq.c
drivers/cpufreq/cpufreq_governor.c
drivers/cpufreq/intel_pstate.c
drivers/cpufreq/kirkwood-cpufreq.c
drivers/cpufreq/scpi-cpufreq.c
drivers/cpufreq/sti-cpufreq.c
include/linux/sched.h
include/trace/events/power.h
kernel/sched/cpufreq.c
kernel/sched/cpufreq_schedutil.c
kernel/sched/deadline.c
kernel/sched/fair.c
kernel/sched/rt.c
kernel/sched/sched.h

index 01bff8e..e750d4a 100644 (file)
@@ -3282,6 +3282,7 @@ L:        linux-pm@vger.kernel.org
 S:     Maintained
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git
 T:     git git://git.linaro.org/people/vireshk/linux.git (For ARM Updates)
+F:     Documentation/cpu-freq/
 F:     drivers/cpufreq/
 F:     include/linux/cpufreq.h
 
index 01986de..36cc7cc 100644 (file)
@@ -28,7 +28,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
 CONFIG_CPU_FREQ_GOV_POWERSAVE=m
 CONFIG_CPU_FREQ_GOV_USERSPACE=m
 CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
-CONFIG_CPU_FREQ_GOV_SCHEDUTIL=m
+CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
 CONFIG_CPUFREQ_DT=y
 CONFIG_CPU_IDLE=y
 CONFIG_ARM_EXYNOS_CPUIDLE=y
index ea3566f..5845910 100644 (file)
@@ -135,7 +135,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
 CONFIG_CPU_FREQ_GOV_POWERSAVE=m
 CONFIG_CPU_FREQ_GOV_USERSPACE=m
 CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
-CONFIG_CPU_FREQ_GOV_SCHEDUTIL=m
+CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
 CONFIG_QORIQ_CPUFREQ=y
 CONFIG_CPU_IDLE=y
 CONFIG_ARM_CPUIDLE=y
index 74919aa..d8b164a 100644 (file)
@@ -194,7 +194,7 @@ config CPU_FREQ_GOV_CONSERVATIVE
          If in doubt, say N.
 
 config CPU_FREQ_GOV_SCHEDUTIL
-       tristate "'schedutil' cpufreq policy governor"
+       bool "'schedutil' cpufreq policy governor"
        depends on CPU_FREQ && SMP
        select CPU_FREQ_GOV_ATTR_SET
        select IRQ_WORK
@@ -208,9 +208,6 @@ config CPU_FREQ_GOV_SCHEDUTIL
          frequency tipping point is at utilization/capacity equal to 80% in
          both cases.
 
-         To compile this driver as a module, choose M here: the module will
-         be called cpufreq_schedutil.
-
          If in doubt, say N.
 
 comment "CPU frequency scaling drivers"
@@ -225,7 +222,7 @@ config CPUFREQ_DT
        help
          This adds a generic DT based cpufreq driver for frequency management.
          It supports both uniprocessor (UP) and symmetric multiprocessor (SMP)
-         systems which share clock and voltage across all CPUs.
+         systems.
 
          If in doubt, say N.
 
index 8882b8e..99db422 100644 (file)
 #include <linux/delay.h>
 #include <linux/cpu.h>
 #include <linux/cpufreq.h>
+#include <linux/dmi.h>
 #include <linux/vmalloc.h>
 
+#include <asm/unaligned.h>
+
 #include <acpi/cppc_acpi.h>
 
+/* Minimum struct length needed for the DMI processor entry we want */
+#define DMI_ENTRY_PROCESSOR_MIN_LENGTH 48
+
+/* Offest in the DMI processor structure for the max frequency */
+#define DMI_PROCESSOR_MAX_SPEED  0x14
+
 /*
  * These structs contain information parsed from per CPU
  * ACPI _CPC structures.
  */
 static struct cpudata **all_cpu_data;
 
+/* Capture the max KHz from DMI */
+static u64 cppc_dmi_max_khz;
+
+/* Callback function used to retrieve the max frequency from DMI */
+static void cppc_find_dmi_mhz(const struct dmi_header *dm, void *private)
+{
+       const u8 *dmi_data = (const u8 *)dm;
+       u16 *mhz = (u16 *)private;
+
+       if (dm->type == DMI_ENTRY_PROCESSOR &&
+           dm->length >= DMI_ENTRY_PROCESSOR_MIN_LENGTH) {
+               u16 val = (u16)get_unaligned((const u16 *)
+                               (dmi_data + DMI_PROCESSOR_MAX_SPEED));
+               *mhz = val > *mhz ? val : *mhz;
+       }
+}
+
+/* Look up the max frequency in DMI */
+static u64 cppc_get_dmi_max_khz(void)
+{
+       u16 mhz = 0;
+
+       dmi_walk(cppc_find_dmi_mhz, &mhz);
+
+       /*
+        * Real stupid fallback value, just in case there is no
+        * actual value set.
+        */
+       mhz = mhz ? mhz : 1;
+
+       return (1000 * mhz);
+}
+
 static int cppc_cpufreq_set_target(struct cpufreq_policy *policy,
                unsigned int target_freq,
                unsigned int relation)
@@ -42,7 +84,7 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy,
 
        cpu = all_cpu_data[policy->cpu];
 
-       cpu->perf_ctrls.desired_perf = target_freq;
+       cpu->perf_ctrls.desired_perf = (u64)target_freq * policy->max / cppc_dmi_max_khz;
        freqs.old = policy->cur;
        freqs.new = target_freq;
 
@@ -94,8 +136,10 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
                return ret;
        }
 
-       policy->min = cpu->perf_caps.lowest_perf;
-       policy->max = cpu->perf_caps.highest_perf;
+       cppc_dmi_max_khz = cppc_get_dmi_max_khz();
+
+       policy->min = cpu->perf_caps.lowest_perf * cppc_dmi_max_khz / cpu->perf_caps.highest_perf;
+       policy->max = cppc_dmi_max_khz;
        policy->cpuinfo.min_freq = policy->min;
        policy->cpuinfo.max_freq = policy->max;
        policy->shared_type = cpu->shared_type;
@@ -112,7 +156,8 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
        cpu->cur_policy = policy;
 
        /* Set policy->cur to max now. The governors will adjust later. */
-       policy->cur = cpu->perf_ctrls.desired_perf = cpu->perf_caps.highest_perf;
+       policy->cur = cppc_dmi_max_khz;
+       cpu->perf_ctrls.desired_perf = cpu->perf_caps.highest_perf;
 
        ret = cppc_set_perf(cpu_num, &cpu->perf_ctrls);
        if (ret)
index 2ee40fd..7126762 100644 (file)
@@ -11,6 +11,8 @@
 #include <linux/of.h>
 #include <linux/platform_device.h>
 
+#include "cpufreq-dt.h"
+
 static const struct of_device_id machines[] __initconst = {
        { .compatible = "allwinner,sun4i-a10", },
        { .compatible = "allwinner,sun5i-a10s", },
@@ -40,6 +42,7 @@ static const struct of_device_id machines[] __initconst = {
        { .compatible = "samsung,exynos5250", },
 #ifndef CONFIG_BL_SWITCHER
        { .compatible = "samsung,exynos5420", },
+       { .compatible = "samsung,exynos5433", },
        { .compatible = "samsung,exynos5800", },
 #endif
 
@@ -51,6 +54,7 @@ static const struct of_device_id machines[] __initconst = {
        { .compatible = "renesas,r8a7779", },
        { .compatible = "renesas,r8a7790", },
        { .compatible = "renesas,r8a7791", },
+       { .compatible = "renesas,r8a7792", },
        { .compatible = "renesas,r8a7793", },
        { .compatible = "renesas,r8a7794", },
        { .compatible = "renesas,sh73a0", },
@@ -68,6 +72,8 @@ static const struct of_device_id machines[] __initconst = {
 
        { .compatible = "sigma,tango4" },
 
+       { .compatible = "ti,am33xx", },
+       { .compatible = "ti,dra7", },
        { .compatible = "ti,omap2", },
        { .compatible = "ti,omap3", },
        { .compatible = "ti,omap4", },
@@ -91,7 +97,8 @@ static int __init cpufreq_dt_platdev_init(void)
        if (!match)
                return -ENODEV;
 
-       return PTR_ERR_OR_ZERO(platform_device_register_simple("cpufreq-dt", -1,
-                                                              NULL, 0));
+       return PTR_ERR_OR_ZERO(platform_device_register_data(NULL, "cpufreq-dt",
+                              -1, match->data,
+                              sizeof(struct cpufreq_dt_platform_data)));
 }
 device_initcall(cpufreq_dt_platdev_init);
index 3957de8..5c07ae0 100644 (file)
@@ -25,6 +25,8 @@
 #include <linux/slab.h>
 #include <linux/thermal.h>
 
+#include "cpufreq-dt.h"
+
 struct private_data {
        struct device *cpu_dev;
        struct thermal_cooling_device *cdev;
@@ -353,6 +355,7 @@ static struct cpufreq_driver dt_cpufreq_driver = {
 
 static int dt_cpufreq_probe(struct platform_device *pdev)
 {
+       struct cpufreq_dt_platform_data *data = dev_get_platdata(&pdev->dev);
        int ret;
 
        /*
@@ -366,7 +369,8 @@ static int dt_cpufreq_probe(struct platform_device *pdev)
        if (ret)
                return ret;
 
-       dt_cpufreq_driver.driver_data = dev_get_platdata(&pdev->dev);
+       if (data && data->have_governor_per_policy)
+               dt_cpufreq_driver.flags |= CPUFREQ_HAVE_GOVERNOR_PER_POLICY;
 
        ret = cpufreq_register_driver(&dt_cpufreq_driver);
        if (ret)
diff --git a/drivers/cpufreq/cpufreq-dt.h b/drivers/cpufreq/cpufreq-dt.h
new file mode 100644 (file)
index 0000000..54d774e
--- /dev/null
@@ -0,0 +1,19 @@
+/*
+ * Copyright (C) 2016 Linaro
+ * Viresh Kumar <viresh.kumar@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __CPUFREQ_DT_H__
+#define __CPUFREQ_DT_H__
+
+#include <linux/types.h>
+
+struct cpufreq_dt_platform_data {
+       bool have_governor_per_policy;
+};
+
+#endif /* __CPUFREQ_DT_H__ */
index 3dd4884..3a64136 100644 (file)
@@ -916,58 +916,18 @@ static struct kobj_type ktype_cpufreq = {
        .release        = cpufreq_sysfs_release,
 };
 
-static int add_cpu_dev_symlink(struct cpufreq_policy *policy, int cpu)
+static int add_cpu_dev_symlink(struct cpufreq_policy *policy,
+                              struct device *dev)
 {
-       struct device *cpu_dev;
-
-       pr_debug("%s: Adding symlink for CPU: %u\n", __func__, cpu);
-
-       if (!policy)
-               return 0;
-
-       cpu_dev = get_cpu_device(cpu);
-       if (WARN_ON(!cpu_dev))
-               return 0;
-
-       return sysfs_create_link(&cpu_dev->kobj, &policy->kobj, "cpufreq");
-}
-
-static void remove_cpu_dev_symlink(struct cpufreq_policy *policy, int cpu)
-{
-       struct device *cpu_dev;
-
-       pr_debug("%s: Removing symlink for CPU: %u\n", __func__, cpu);
-
-       cpu_dev = get_cpu_device(cpu);
-       if (WARN_ON(!cpu_dev))
-               return;
-
-       sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
+       dev_dbg(dev, "%s: Adding symlink\n", __func__);
+       return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
 }
 
-/* Add/remove symlinks for all related CPUs */
-static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy)
+static void remove_cpu_dev_symlink(struct cpufreq_policy *policy,
+                                  struct device *dev)
 {
-       unsigned int j;
-       int ret = 0;
-
-       /* Some related CPUs might not be present (physically hotplugged) */
-       for_each_cpu(j, policy->real_cpus) {
-               ret = add_cpu_dev_symlink(policy, j);
-               if (ret)
-                       break;
-       }
-
-       return ret;
-}
-
-static void cpufreq_remove_dev_symlink(struct cpufreq_policy *policy)
-{
-       unsigned int j;
-
-       /* Some related CPUs might not be present (physically hotplugged) */
-       for_each_cpu(j, policy->real_cpus)
-               remove_cpu_dev_symlink(policy, j);
+       dev_dbg(dev, "%s: Removing symlink\n", __func__);
+       sysfs_remove_link(&dev->kobj, "cpufreq");
 }
 
 static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
@@ -999,7 +959,7 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
                        return ret;
        }
 
-       return cpufreq_add_dev_symlink(policy);
+       return 0;
 }
 
 __weak struct cpufreq_governor *cpufreq_default_governor(void)
@@ -1073,13 +1033,9 @@ static void handle_update(struct work_struct *work)
 
 static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
 {
-       struct device *dev = get_cpu_device(cpu);
        struct cpufreq_policy *policy;
        int ret;
 
-       if (WARN_ON(!dev))
-               return NULL;
-
        policy = kzalloc(sizeof(*policy), GFP_KERNEL);
        if (!policy)
                return NULL;
@@ -1133,7 +1089,6 @@ static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy, bool notify)
 
        down_write(&policy->rwsem);
        cpufreq_stats_free_table(policy);
-       cpufreq_remove_dev_symlink(policy);
        kobj = &policy->kobj;
        cmp = &policy->kobj_unregister;
        up_write(&policy->rwsem);
@@ -1215,8 +1170,8 @@ static int cpufreq_online(unsigned int cpu)
        if (new_policy) {
                /* related_cpus should at least include policy->cpus. */
                cpumask_copy(policy->related_cpus, policy->cpus);
-               /* Remember CPUs present at the policy creation time. */
-               cpumask_and(policy->real_cpus, policy->cpus, cpu_present_mask);
+               /* Clear mask of registered CPUs */
+               cpumask_clear(policy->real_cpus);
        }
 
        /*
@@ -1331,6 +1286,8 @@ out_free_policy:
        return ret;
 }
 
+static void cpufreq_offline(unsigned int cpu);
+
 /**
  * cpufreq_add_dev - the cpufreq interface for a CPU device.
  * @dev: CPU device.
@@ -1340,22 +1297,28 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 {
        struct cpufreq_policy *policy;
        unsigned cpu = dev->id;
+       int ret;
 
        dev_dbg(dev, "%s: adding CPU%u\n", __func__, cpu);
 
-       if (cpu_online(cpu))
-               return cpufreq_online(cpu);
+       if (cpu_online(cpu)) {
+               ret = cpufreq_online(cpu);
+               if (ret)
+                       return ret;
+       }
 
-       /*
-        * A hotplug notifier will follow and we will handle it as CPU online
-        * then.  For now, just create the sysfs link, unless there is no policy
-        * or the link is already present.
-        */
+       /* Create sysfs link on CPU registration */
        policy = per_cpu(cpufreq_cpu_data, cpu);
        if (!policy || cpumask_test_and_set_cpu(cpu, policy->real_cpus))
                return 0;
 
-       return add_cpu_dev_symlink(policy, cpu);
+       ret = add_cpu_dev_symlink(policy, dev);
+       if (ret) {
+               cpumask_clear_cpu(cpu, policy->real_cpus);
+               cpufreq_offline(cpu);
+       }
+
+       return ret;
 }
 
 static void cpufreq_offline(unsigned int cpu)
@@ -1436,7 +1399,7 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
                cpufreq_offline(cpu);
 
        cpumask_clear_cpu(cpu, policy->real_cpus);
-       remove_cpu_dev_symlink(policy, cpu);
+       remove_cpu_dev_symlink(policy, dev);
 
        if (cpumask_empty(policy->real_cpus))
                cpufreq_policy_free(policy, true);
index e415349..642dd0f 100644 (file)
@@ -260,7 +260,7 @@ static void dbs_irq_work(struct irq_work *irq_work)
 }
 
 static void dbs_update_util_handler(struct update_util_data *data, u64 time,
-                                   unsigned long util, unsigned long max)
+                                   unsigned int flags)
 {
        struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util);
        struct policy_dbs_info *policy_dbs = cdbs->policy_dbs;
index be9eade..806f203 100644 (file)
@@ -181,6 +181,8 @@ struct _pid {
  * @cpu:               CPU number for this instance data
  * @update_util:       CPUFreq utility callback information
  * @update_util_set:   CPUFreq utility callback is set
+ * @iowait_boost:      iowait-related boost fraction
+ * @last_update:       Time of the last update.
  * @pstate:            Stores P state limits for this CPU
  * @vid:               Stores VID limits for this CPU
  * @pid:               Stores PID parameters for this CPU
@@ -206,6 +208,7 @@ struct cpudata {
        struct vid_data vid;
        struct _pid pid;
 
+       u64     last_update;
        u64     last_sample_time;
        u64     prev_aperf;
        u64     prev_mperf;
@@ -216,6 +219,7 @@ struct cpudata {
        struct acpi_processor_performance acpi_perf_data;
        bool valid_pss_table;
 #endif
+       unsigned int iowait_boost;
 };
 
 static struct cpudata **all_cpu_data;
@@ -229,6 +233,7 @@ static struct cpudata **all_cpu_data;
  * @p_gain_pct:                PID proportional gain
  * @i_gain_pct:                PID integral gain
  * @d_gain_pct:                PID derivative gain
+ * @boost_iowait:      Whether or not to use iowait boosting.
  *
  * Stores per CPU model static PID configuration data.
  */
@@ -240,6 +245,7 @@ struct pstate_adjust_policy {
        int p_gain_pct;
        int d_gain_pct;
        int i_gain_pct;
+       bool boost_iowait;
 };
 
 /**
@@ -1029,7 +1035,7 @@ static struct cpu_defaults core_params = {
        },
 };
 
-static struct cpu_defaults silvermont_params = {
+static const struct cpu_defaults silvermont_params = {
        .pid_policy = {
                .sample_rate_ms = 10,
                .deadband = 0,
@@ -1037,6 +1043,7 @@ static struct cpu_defaults silvermont_params = {
                .p_gain_pct = 14,
                .d_gain_pct = 0,
                .i_gain_pct = 4,
+               .boost_iowait = true,
        },
        .funcs = {
                .get_max = atom_get_max_pstate,
@@ -1050,7 +1057,7 @@ static struct cpu_defaults silvermont_params = {
        },
 };
 
-static struct cpu_defaults airmont_params = {
+static const struct cpu_defaults airmont_params = {
        .pid_policy = {
                .sample_rate_ms = 10,
                .deadband = 0,
@@ -1058,6 +1065,7 @@ static struct cpu_defaults airmont_params = {
                .p_gain_pct = 14,
                .d_gain_pct = 0,
                .i_gain_pct = 4,
+               .boost_iowait = true,
        },
        .funcs = {
                .get_max = atom_get_max_pstate,
@@ -1071,7 +1079,7 @@ static struct cpu_defaults airmont_params = {
        },
 };
 
-static struct cpu_defaults knl_params = {
+static const struct cpu_defaults knl_params = {
        .pid_policy = {
                .sample_rate_ms = 10,
                .deadband = 0,
@@ -1091,7 +1099,7 @@ static struct cpu_defaults knl_params = {
        },
 };
 
-static struct cpu_defaults bxt_params = {
+static const struct cpu_defaults bxt_params = {
        .pid_policy = {
                .sample_rate_ms = 10,
                .deadband = 0,
@@ -1099,6 +1107,7 @@ static struct cpu_defaults bxt_params = {
                .p_gain_pct = 14,
                .d_gain_pct = 0,
                .i_gain_pct = 4,
+               .boost_iowait = true,
        },
        .funcs = {
                .get_max = core_get_max_pstate,
@@ -1222,36 +1231,18 @@ static inline int32_t get_avg_pstate(struct cpudata *cpu)
 static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
 {
        struct sample *sample = &cpu->sample;
-       u64 cummulative_iowait, delta_iowait_us;
-       u64 delta_iowait_mperf;
-       u64 mperf, now;
-       int32_t cpu_load;
+       int32_t busy_frac, boost;
 
-       cummulative_iowait = get_cpu_iowait_time_us(cpu->cpu, &now);
+       busy_frac = div_fp(sample->mperf, sample->tsc);
 
-       /*
-        * Convert iowait time into number of IO cycles spent at max_freq.
-        * IO is considered as busy only for the cpu_load algorithm. For
-        * performance this is not needed since we always try to reach the
-        * maximum P-State, so we are already boosting the IOs.
-        */
-       delta_iowait_us = cummulative_iowait - cpu->prev_cummulative_iowait;
-       delta_iowait_mperf = div64_u64(delta_iowait_us * cpu->pstate.scaling *
-               cpu->pstate.max_pstate, MSEC_PER_SEC);
+       boost = cpu->iowait_boost;
+       cpu->iowait_boost >>= 1;
 
-       mperf = cpu->sample.mperf + delta_iowait_mperf;
-       cpu->prev_cummulative_iowait = cummulative_iowait;
+       if (busy_frac < boost)
+               busy_frac = boost;
 
-       /*
-        * The load can be estimated as the ratio of the mperf counter
-        * running at a constant frequency during active periods
-        * (C0) and the time stamp counter running at the same frequency
-        * also during C-states.
-        */
-       cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc);
-       cpu->sample.busy_scaled = cpu_load;
-
-       return get_avg_pstate(cpu) - pid_calc(&cpu->pid, cpu_load);
+       sample->busy_scaled = busy_frac * 100;
+       return get_avg_pstate(cpu) - pid_calc(&cpu->pid, sample->busy_scaled);
 }
 
 static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
@@ -1325,15 +1316,29 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
                sample->mperf,
                sample->aperf,
                sample->tsc,
-               get_avg_frequency(cpu));
+               get_avg_frequency(cpu),
+               fp_toint(cpu->iowait_boost * 100));
 }
 
 static void intel_pstate_update_util(struct update_util_data *data, u64 time,
-                                    unsigned long util, unsigned long max)
+                                    unsigned int flags)
 {
        struct cpudata *cpu = container_of(data, struct cpudata, update_util);
-       u64 delta_ns = time - cpu->sample.time;
+       u64 delta_ns;
+
+       if (pid_params.boost_iowait) {
+               if (flags & SCHED_CPUFREQ_IOWAIT) {
+                       cpu->iowait_boost = int_tofp(1);
+               } else if (cpu->iowait_boost) {
+                       /* Clear iowait_boost if the CPU may have been idle. */
+                       delta_ns = time - cpu->last_update;
+                       if (delta_ns > TICK_NSEC)
+                               cpu->iowait_boost = 0;
+               }
+               cpu->last_update = time;
+       }
 
+       delta_ns = time - cpu->sample.time;
        if ((s64)delta_ns >= pid_params.sample_rate_ns) {
                bool sample_taken = intel_pstate_sample(cpu, time);
 
index be42f10..1b9bcd7 100644 (file)
@@ -123,7 +123,7 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev)
 
        priv.cpu_clk = of_clk_get_by_name(np, "cpu_clk");
        if (IS_ERR(priv.cpu_clk)) {
-               dev_err(priv.dev, "Unable to get cpuclk");
+               dev_err(priv.dev, "Unable to get cpuclk\n");
                return PTR_ERR(priv.cpu_clk);
        }
 
@@ -132,7 +132,7 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev)
 
        priv.ddr_clk = of_clk_get_by_name(np, "ddrclk");
        if (IS_ERR(priv.ddr_clk)) {
-               dev_err(priv.dev, "Unable to get ddrclk");
+               dev_err(priv.dev, "Unable to get ddrclk\n");
                err = PTR_ERR(priv.ddr_clk);
                goto out_cpu;
        }
@@ -142,7 +142,7 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev)
 
        priv.powersave_clk = of_clk_get_by_name(np, "powersave");
        if (IS_ERR(priv.powersave_clk)) {
-               dev_err(priv.dev, "Unable to get powersave");
+               dev_err(priv.dev, "Unable to get powersave\n");
                err = PTR_ERR(priv.powersave_clk);
                goto out_ddr;
        }
@@ -155,7 +155,7 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev)
        if (!err)
                return 0;
 
-       dev_err(priv.dev, "Failed to register cpufreq driver");
+       dev_err(priv.dev, "Failed to register cpufreq driver\n");
 
        clk_disable_unprepare(priv.powersave_clk);
 out_ddr:
index e8a7bf5..ea7a4e1 100644 (file)
@@ -105,7 +105,6 @@ static int scpi_cpufreq_remove(struct platform_device *pdev)
 static struct platform_driver scpi_cpufreq_platdrv = {
        .driver = {
                .name   = "scpi-cpufreq",
-               .owner  = THIS_MODULE,
        },
        .probe          = scpi_cpufreq_probe,
        .remove         = scpi_cpufreq_remove,
index 0404203..b366e6d 100644 (file)
@@ -163,7 +163,7 @@ static int sti_cpufreq_set_opp_info(void)
 
        reg_fields = sti_cpufreq_match();
        if (!reg_fields) {
-               dev_err(dev, "This SoC doesn't support voltage scaling");
+               dev_err(dev, "This SoC doesn't support voltage scaling\n");
                return -ENODEV;
        }
 
index 62c68e5..98fe95f 100644 (file)
@@ -3469,15 +3469,20 @@ static inline unsigned long rlimit_max(unsigned int limit)
        return task_rlimit_max(current, limit);
 }
 
+#define SCHED_CPUFREQ_RT       (1U << 0)
+#define SCHED_CPUFREQ_DL       (1U << 1)
+#define SCHED_CPUFREQ_IOWAIT   (1U << 2)
+
+#define SCHED_CPUFREQ_RT_DL    (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL)
+
 #ifdef CONFIG_CPU_FREQ
 struct update_util_data {
-       void (*func)(struct update_util_data *data,
-                    u64 time, unsigned long util, unsigned long max);
+       void (*func)(struct update_util_data *data, u64 time, unsigned int flags);
 };
 
 void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
-                       void (*func)(struct update_util_data *data, u64 time,
-                                    unsigned long util, unsigned long max));
+                       void (*func)(struct update_util_data *data, u64 time,
+                                   unsigned int flags));
 void cpufreq_remove_update_util_hook(int cpu);
 #endif /* CONFIG_CPU_FREQ */
 
index 19e5030..54e3aad 100644 (file)
@@ -69,7 +69,8 @@ TRACE_EVENT(pstate_sample,
                u64 mperf,
                u64 aperf,
                u64 tsc,
-               u32 freq
+               u32 freq,
+               u32 io_boost
                ),
 
        TP_ARGS(core_busy,
@@ -79,7 +80,8 @@ TRACE_EVENT(pstate_sample,
                mperf,
                aperf,
                tsc,
-               freq
+               freq,
+               io_boost
                ),
 
        TP_STRUCT__entry(
@@ -91,6 +93,7 @@ TRACE_EVENT(pstate_sample,
                __field(u64, aperf)
                __field(u64, tsc)
                __field(u32, freq)
+               __field(u32, io_boost)
                ),
 
        TP_fast_assign(
@@ -102,9 +105,10 @@ TRACE_EVENT(pstate_sample,
                __entry->aperf = aperf;
                __entry->tsc = tsc;
                __entry->freq = freq;
+               __entry->io_boost = io_boost;
                ),
 
-       TP_printk("core_busy=%lu scaled=%lu from=%lu to=%lu mperf=%llu aperf=%llu tsc=%llu freq=%lu ",
+       TP_printk("core_busy=%lu scaled=%lu from=%lu to=%lu mperf=%llu aperf=%llu tsc=%llu freq=%lu io_boost=%lu",
                (unsigned long)__entry->core_busy,
                (unsigned long)__entry->scaled_busy,
                (unsigned long)__entry->from,
@@ -112,7 +116,8 @@ TRACE_EVENT(pstate_sample,
                (unsigned long long)__entry->mperf,
                (unsigned long long)__entry->aperf,
                (unsigned long long)__entry->tsc,
-               (unsigned long)__entry->freq
+               (unsigned long)__entry->freq,
+               (unsigned long)__entry->io_boost
                )
 
 );
index 1141954..dbc5144 100644 (file)
@@ -33,7 +33,7 @@ DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
  */
 void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
                        void (*func)(struct update_util_data *data, u64 time,
-                                    unsigned long util, unsigned long max))
+                                    unsigned int flags))
 {
        if (WARN_ON(!data || !func))
                return;
index a84641b..69e0689 100644 (file)
@@ -12,7 +12,6 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/cpufreq.h>
-#include <linux/module.h>
 #include <linux/slab.h>
 #include <trace/events/power.h>
 
@@ -48,11 +47,14 @@ struct sugov_cpu {
        struct sugov_policy *sg_policy;
 
        unsigned int cached_raw_freq;
+       unsigned long iowait_boost;
+       unsigned long iowait_boost_max;
+       u64 last_update;
 
        /* The fields below are only needed when sharing a policy. */
        unsigned long util;
        unsigned long max;
-       u64 last_update;
+       unsigned int flags;
 };
 
 static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
@@ -144,24 +146,75 @@ static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util,
        return cpufreq_driver_resolve_freq(policy, freq);
 }
 
+static void sugov_get_util(unsigned long *util, unsigned long *max)
+{
+       struct rq *rq = this_rq();
+       unsigned long cfs_max;
+
+       cfs_max = arch_scale_cpu_capacity(NULL, smp_processor_id());
+
+       *util = min(rq->cfs.avg.util_avg, cfs_max);
+       *max = cfs_max;
+}
+
+static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
+                                  unsigned int flags)
+{
+       if (flags & SCHED_CPUFREQ_IOWAIT) {
+               sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
+       } else if (sg_cpu->iowait_boost) {
+               s64 delta_ns = time - sg_cpu->last_update;
+
+               /* Clear iowait_boost if the CPU apprears to have been idle. */
+               if (delta_ns > TICK_NSEC)
+                       sg_cpu->iowait_boost = 0;
+       }
+}
+
+static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
+                              unsigned long *max)
+{
+       unsigned long boost_util = sg_cpu->iowait_boost;
+       unsigned long boost_max = sg_cpu->iowait_boost_max;
+
+       if (!boost_util)
+               return;
+
+       if (*util * boost_max < *max * boost_util) {
+               *util = boost_util;
+               *max = boost_max;
+       }
+       sg_cpu->iowait_boost >>= 1;
+}
+
 static void sugov_update_single(struct update_util_data *hook, u64 time,
-                               unsigned long util, unsigned long max)
+                               unsigned int flags)
 {
        struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
        struct sugov_policy *sg_policy = sg_cpu->sg_policy;
        struct cpufreq_policy *policy = sg_policy->policy;
+       unsigned long util, max;
        unsigned int next_f;
 
+       sugov_set_iowait_boost(sg_cpu, time, flags);
+       sg_cpu->last_update = time;
+
        if (!sugov_should_update_freq(sg_policy, time))
                return;
 
-       next_f = util == ULONG_MAX ? policy->cpuinfo.max_freq :
-                       get_next_freq(sg_cpu, util, max);
+       if (flags & SCHED_CPUFREQ_RT_DL) {
+               next_f = policy->cpuinfo.max_freq;
+       } else {
+               sugov_get_util(&util, &max);
+               sugov_iowait_boost(sg_cpu, &util, &max);
+               next_f = get_next_freq(sg_cpu, util, max);
+       }
        sugov_update_commit(sg_policy, time, next_f);
 }
 
 static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
-                                          unsigned long util, unsigned long max)
+                                          unsigned long util, unsigned long max,
+                                          unsigned int flags)
 {
        struct sugov_policy *sg_policy = sg_cpu->sg_policy;
        struct cpufreq_policy *policy = sg_policy->policy;
@@ -169,9 +222,11 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
        u64 last_freq_update_time = sg_policy->last_freq_update_time;
        unsigned int j;
 
-       if (util == ULONG_MAX)
+       if (flags & SCHED_CPUFREQ_RT_DL)
                return max_f;
 
+       sugov_iowait_boost(sg_cpu, &util, &max);
+
        for_each_cpu(j, policy->cpus) {
                struct sugov_cpu *j_sg_cpu;
                unsigned long j_util, j_max;
@@ -186,41 +241,50 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
                 * frequency update and the time elapsed between the last update
                 * of the CPU utilization and the last frequency update is long
                 * enough, don't take the CPU into account as it probably is
-                * idle now.
+                * idle now (and clear iowait_boost for it).
                 */
                delta_ns = last_freq_update_time - j_sg_cpu->last_update;
-               if (delta_ns > TICK_NSEC)
+               if (delta_ns > TICK_NSEC) {
+                       j_sg_cpu->iowait_boost = 0;
                        continue;
-
-               j_util = j_sg_cpu->util;
-               if (j_util == ULONG_MAX)
+               }
+               if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL)
                        return max_f;
 
+               j_util = j_sg_cpu->util;
                j_max = j_sg_cpu->max;
                if (j_util * max > j_max * util) {
                        util = j_util;
                        max = j_max;
                }
+
+               sugov_iowait_boost(j_sg_cpu, &util, &max);
        }
 
        return get_next_freq(sg_cpu, util, max);
 }
 
 static void sugov_update_shared(struct update_util_data *hook, u64 time,
-                               unsigned long util, unsigned long max)
+                               unsigned int flags)
 {
        struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
        struct sugov_policy *sg_policy = sg_cpu->sg_policy;
+       unsigned long util, max;
        unsigned int next_f;
 
+       sugov_get_util(&util, &max);
+
        raw_spin_lock(&sg_policy->update_lock);
 
        sg_cpu->util = util;
        sg_cpu->max = max;
+       sg_cpu->flags = flags;
+
+       sugov_set_iowait_boost(sg_cpu, time, flags);
        sg_cpu->last_update = time;
 
        if (sugov_should_update_freq(sg_policy, time)) {
-               next_f = sugov_next_freq_shared(sg_cpu, util, max);
+               next_f = sugov_next_freq_shared(sg_cpu, util, max, flags);
                sugov_update_commit(sg_policy, time, next_f);
        }
 
@@ -444,10 +508,13 @@ static int sugov_start(struct cpufreq_policy *policy)
 
                sg_cpu->sg_policy = sg_policy;
                if (policy_is_shared(policy)) {
-                       sg_cpu->util = ULONG_MAX;
+                       sg_cpu->util = 0;
                        sg_cpu->max = 0;
+                       sg_cpu->flags = SCHED_CPUFREQ_RT;
                        sg_cpu->last_update = 0;
                        sg_cpu->cached_raw_freq = 0;
+                       sg_cpu->iowait_boost = 0;
+                       sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
                        cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
                                                     sugov_update_shared);
                } else {
@@ -495,28 +562,15 @@ static struct cpufreq_governor schedutil_gov = {
        .limits = sugov_limits,
 };
 
-static int __init sugov_module_init(void)
-{
-       return cpufreq_register_governor(&schedutil_gov);
-}
-
-static void __exit sugov_module_exit(void)
-{
-       cpufreq_unregister_governor(&schedutil_gov);
-}
-
-MODULE_AUTHOR("Rafael J. Wysocki <rafael.j.wysocki@intel.com>");
-MODULE_DESCRIPTION("Utilization-based CPU frequency selection");
-MODULE_LICENSE("GPL");
-
 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
 struct cpufreq_governor *cpufreq_default_governor(void)
 {
        return &schedutil_gov;
 }
-
-fs_initcall(sugov_module_init);
-#else
-module_init(sugov_module_init);
 #endif
-module_exit(sugov_module_exit);
+
+static int __init sugov_register(void)
+{
+       return cpufreq_register_governor(&schedutil_gov);
+}
+fs_initcall(sugov_register);
index 1ce8867..9747796 100644 (file)
@@ -735,9 +735,8 @@ static void update_curr_dl(struct rq *rq)
                return;
        }
 
-       /* kick cpufreq (see the comment in linux/cpufreq.h). */
-       if (cpu_of(rq) == smp_processor_id())
-               cpufreq_trigger_update(rq_clock(rq));
+       /* kick cpufreq (see the comment in kernel/sched/sched.h). */
+       cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_DL);
 
        schedstat_set(curr->se.statistics.exec_max,
                      max(curr->se.statistics.exec_max, delta_exec));
index 039de34..a5cd07b 100644 (file)
@@ -2875,12 +2875,7 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
 
 static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
 {
-       struct rq *rq = rq_of(cfs_rq);
-       int cpu = cpu_of(rq);
-
-       if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) {
-               unsigned long max = rq->cpu_capacity_orig;
-
+       if (&this_rq()->cfs == cfs_rq) {
                /*
                 * There are a few boundary cases this might miss but it should
                 * get called often enough that that should (hopefully) not be
@@ -2897,8 +2892,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
                 *
                 * See cpu_util().
                 */
-               cpufreq_update_util(rq_clock(rq),
-                                   min(cfs_rq->avg.util_avg, max), max);
+               cpufreq_update_util(rq_of(cfs_rq), 0);
        }
 }
 
@@ -3159,10 +3153,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
 
 static inline void update_load_avg(struct sched_entity *se, int not_used)
 {
-       struct cfs_rq *cfs_rq = cfs_rq_of(se);
-       struct rq *rq = rq_of(cfs_rq);
-
-       cpufreq_trigger_update(rq_clock(rq));
+       cpufreq_update_util(rq_of(cfs_rq_of(se)), 0);
 }
 
 static inline void
@@ -4509,6 +4500,14 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
        struct cfs_rq *cfs_rq;
        struct sched_entity *se = &p->se;
 
+       /*
+        * If in_iowait is set, the code below may not trigger any cpufreq
+        * utilization updates, so do it here explicitly with the IOWAIT flag
+        * passed.
+        */
+       if (p->in_iowait)
+               cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_IOWAIT);
+
        for_each_sched_entity(se) {
                if (se->on_rq)
                        break;
index d5690b7..2516b8d 100644 (file)
@@ -957,9 +957,8 @@ static void update_curr_rt(struct rq *rq)
        if (unlikely((s64)delta_exec <= 0))
                return;
 
-       /* Kick cpufreq (see the comment in linux/cpufreq.h). */
-       if (cpu_of(rq) == smp_processor_id())
-               cpufreq_trigger_update(rq_clock(rq));
+       /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
+       cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT);
 
        schedstat_set(curr->se.statistics.exec_max,
                      max(curr->se.statistics.exec_max, delta_exec));
index c64fc51..b7fc1ce 100644 (file)
@@ -1763,27 +1763,13 @@ DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
 
 /**
  * cpufreq_update_util - Take a note about CPU utilization changes.
- * @time: Current time.
- * @util: Current utilization.
- * @max: Utilization ceiling.
+ * @rq: Runqueue to carry out the update for.
+ * @flags: Update reason flags.
  *
- * This function is called by the scheduler on every invocation of
- * update_load_avg() on the CPU whose utilization is being updated.
+ * This function is called by the scheduler on the CPU whose utilization is
+ * being updated.
  *
  * It can only be called from RCU-sched read-side critical sections.
- */
-static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max)
-{
-       struct update_util_data *data;
-
-       data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
-       if (data)
-               data->func(data, time, util, max);
-}
-
-/**
- * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed.
- * @time: Current time.
  *
  * The way cpufreq is currently arranged requires it to evaluate the CPU
  * performance state (frequency/voltage) on a regular basis to prevent it from
@@ -1797,13 +1783,23 @@ static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned lo
  * but that really is a band-aid.  Going forward it should be replaced with
  * solutions targeted more specifically at RT and DL tasks.
  */
-static inline void cpufreq_trigger_update(u64 time)
+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
+{
+       struct update_util_data *data;
+
+       data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
+       if (data)
+               data->func(data, rq_clock(rq), flags);
+}
+
+static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags)
 {
-       cpufreq_update_util(time, ULONG_MAX, 0);
+       if (cpu_of(rq) == smp_processor_id())
+               cpufreq_update_util(rq, flags);
 }
 #else
-static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) {}
-static inline void cpufreq_trigger_update(u64 time) {}
+static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
+static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {}
 #endif /* CONFIG_CPU_FREQ */
 
 #ifdef arch_scale_freq_capacity