Merge cpufreq fixes going into v4.6.
[cascardo/linux.git] / drivers / cpufreq / intel_pstate.c
index b230eba..3addb22 100644 (file)
@@ -10,6 +10,8 @@
  * of the License.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/kernel_stat.h>
 #include <linux/module.h>
 #define ATOM_TURBO_RATIOS      0x66c
 #define ATOM_TURBO_VIDS                0x66d
 
+#ifdef CONFIG_ACPI
+#include <acpi/processor.h>
+#endif
+
 #define FRAC_BITS 8
 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
 #define fp_toint(X) ((X) >> FRAC_BITS)
@@ -172,6 +178,8 @@ struct _pid {
  * @prev_cummulative_iowait: IO Wait time difference from last and
  *                     current sample
  * @sample:            Storage for storing last Sample data
+ * @acpi_perf_data:    Stores ACPI perf information read from _PSS
+ * @valid_pss_table:   Set to true for valid ACPI _PSS entries found
  *
  * This structure stores per CPU instance data for all CPUs.
  */
@@ -190,6 +198,10 @@ struct cpudata {
        u64     prev_tsc;
        u64     prev_cummulative_iowait;
        struct sample sample;
+#ifdef CONFIG_ACPI
+       struct acpi_processor_performance acpi_perf_data;
+       bool valid_pss_table;
+#endif
 };
 
 static struct cpudata **all_cpu_data;
@@ -258,6 +270,9 @@ static struct pstate_adjust_policy pid_params;
 static struct pstate_funcs pstate_funcs;
 static int hwp_active;
 
+#ifdef CONFIG_ACPI
+static bool acpi_ppc;
+#endif
 
 /**
  * struct perf_limits - Store user and policy limits
@@ -331,6 +346,124 @@ static struct perf_limits *limits = &performance_limits;
 static struct perf_limits *limits = &powersave_limits;
 #endif
 
+#ifdef CONFIG_ACPI
+
+static bool intel_pstate_get_ppc_enable_status(void)
+{
+       if (acpi_gbl_FADT.preferred_profile == PM_ENTERPRISE_SERVER ||
+           acpi_gbl_FADT.preferred_profile == PM_PERFORMANCE_SERVER)
+               return true;
+
+       return acpi_ppc;
+}
+
+/*
+ * The max target pstate ratio is a 8 bit value in both PLATFORM_INFO MSR and
+ * in TURBO_RATIO_LIMIT MSR, which pstate driver stores in max_pstate and
+ * max_turbo_pstate fields. The PERF_CTL MSR contains 16 bit value for P state
+ * ratio, out of it only high 8 bits are used. For example 0x1700 is setting
+ * target ratio 0x17. The _PSS control value stores in a format which can be
+ * directly written to PERF_CTL MSR. But in intel_pstate driver this shift
+ * occurs during write to PERF_CTL (E.g. for cores core_set_pstate()).
+ * This function converts the _PSS control value to intel pstate driver format
+ * for comparison and assignment.
+ */
+static int convert_to_native_pstate_format(struct cpudata *cpu, int index)
+{
+       return cpu->acpi_perf_data.states[index].control >> 8;
+}
+
+static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
+{
+       struct cpudata *cpu;
+       int turbo_pss_ctl;
+       int ret;
+       int i;
+
+       if (hwp_active)
+               return;
+
+       if (!intel_pstate_get_ppc_enable_status())
+               return;
+
+       cpu = all_cpu_data[policy->cpu];
+
+       ret = acpi_processor_register_performance(&cpu->acpi_perf_data,
+                                                 policy->cpu);
+       if (ret)
+               return;
+
+       /*
+        * Check if the control value in _PSS is for PERF_CTL MSR, which should
+        * guarantee that the states returned by it map to the states in our
+        * list directly.
+        */
+       if (cpu->acpi_perf_data.control_register.space_id !=
+                                               ACPI_ADR_SPACE_FIXED_HARDWARE)
+               goto err;
+
+       /*
+        * If there is only one entry _PSS, simply ignore _PSS and continue as
+        * usual without taking _PSS into account
+        */
+       if (cpu->acpi_perf_data.state_count < 2)
+               goto err;
+
+       pr_debug("CPU%u - ACPI _PSS perf data\n", policy->cpu);
+       for (i = 0; i < cpu->acpi_perf_data.state_count; i++) {
+               pr_debug("     %cP%d: %u MHz, %u mW, 0x%x\n",
+                        (i == cpu->acpi_perf_data.state ? '*' : ' '), i,
+                        (u32) cpu->acpi_perf_data.states[i].core_frequency,
+                        (u32) cpu->acpi_perf_data.states[i].power,
+                        (u32) cpu->acpi_perf_data.states[i].control);
+       }
+
+       /*
+        * The _PSS table doesn't contain whole turbo frequency range.
+        * This just contains +1 MHZ above the max non turbo frequency,
+        * with control value corresponding to max turbo ratio. But
+        * when cpufreq set policy is called, it will call with this
+        * max frequency, which will cause a reduced performance as
+        * this driver uses real max turbo frequency as the max
+        * frequency. So correct this frequency in _PSS table to
+        * correct max turbo frequency based on the turbo ratio.
+        * Also need to convert to MHz as _PSS freq is in MHz.
+        */
+       turbo_pss_ctl = convert_to_native_pstate_format(cpu, 0);
+       if (turbo_pss_ctl > cpu->pstate.max_pstate)
+               cpu->acpi_perf_data.states[0].core_frequency =
+                                       policy->cpuinfo.max_freq / 1000;
+       cpu->valid_pss_table = true;
+       pr_info("_PPC limits will be enforced\n");
+
+       return;
+
+ err:
+       cpu->valid_pss_table = false;
+       acpi_processor_unregister_performance(policy->cpu);
+}
+
+static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
+{
+       struct cpudata *cpu;
+
+       cpu = all_cpu_data[policy->cpu];
+       if (!cpu->valid_pss_table)
+               return;
+
+       acpi_processor_unregister_performance(policy->cpu);
+}
+
+#else
+static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
+{
+}
+
+static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
+{
+}
+#endif
+
 static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
                             int deadband, int integral) {
        pid->setpoint = int_tofp(setpoint);
@@ -341,17 +474,17 @@ static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
 
 static inline void pid_p_gain_set(struct _pid *pid, int percent)
 {
-       pid->p_gain = div_fp(int_tofp(percent), int_tofp(100));
+       pid->p_gain = div_fp(percent, 100);
 }
 
 static inline void pid_i_gain_set(struct _pid *pid, int percent)
 {
-       pid->i_gain = div_fp(int_tofp(percent), int_tofp(100));
+       pid->i_gain = div_fp(percent, 100);
 }
 
 static inline void pid_d_gain_set(struct _pid *pid, int percent)
 {
-       pid->d_gain = div_fp(int_tofp(percent), int_tofp(100));
+       pid->d_gain = div_fp(percent, 100);
 }
 
 static signed int pid_calc(struct _pid *pid, int32_t busy)
@@ -537,7 +670,7 @@ static ssize_t show_turbo_pct(struct kobject *kobj,
 
        total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1;
        no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1;
-       turbo_fp = div_fp(int_tofp(no_turbo), int_tofp(total));
+       turbo_fp = div_fp(no_turbo, total);
        turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100)));
        return sprintf(buf, "%u\n", turbo_pct);
 }
@@ -579,7 +712,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
 
        update_turbo_state();
        if (limits->turbo_disabled) {
-               pr_warn("intel_pstate: Turbo disabled by BIOS or unavailable on processor\n");
+               pr_warn("Turbo disabled by BIOS or unavailable on processor\n");
                return -EPERM;
        }
 
@@ -608,8 +741,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
                                   limits->max_perf_pct);
        limits->max_perf_pct = max(limits->min_perf_pct,
                                   limits->max_perf_pct);
-       limits->max_perf = div_fp(int_tofp(limits->max_perf_pct),
-                                 int_tofp(100));
+       limits->max_perf = div_fp(limits->max_perf_pct, 100);
 
        if (hwp_active)
                intel_pstate_hwp_set_online_cpus();
@@ -633,8 +765,7 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
                                   limits->min_perf_pct);
        limits->min_perf_pct = min(limits->max_perf_pct,
                                   limits->min_perf_pct);
-       limits->min_perf = div_fp(int_tofp(limits->min_perf_pct),
-                                 int_tofp(100));
+       limits->min_perf = div_fp(limits->min_perf_pct, 100);
 
        if (hwp_active)
                intel_pstate_hwp_set_online_cpus();
@@ -1024,8 +1155,8 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu)
        struct sample *sample = &cpu->sample;
        int64_t core_pct;
 
-       core_pct = int_tofp(sample->aperf) * int_tofp(100);
-       core_pct = div64_u64(core_pct, int_tofp(sample->mperf));
+       core_pct = sample->aperf * int_tofp(100);
+       core_pct = div64_u64(core_pct, sample->mperf);
 
        sample->core_pct_busy = (int32_t)core_pct;
 }
@@ -1075,6 +1206,12 @@ static inline int32_t get_avg_frequency(struct cpudata *cpu)
                                                cpu->pstate.scaling / 100)));
 }
 
+static inline int32_t get_avg_pstate(struct cpudata *cpu)
+{
+       return div64_u64(cpu->pstate.max_pstate_physical * cpu->sample.aperf,
+                        cpu->sample.mperf);
+}
+
 static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
 {
        struct sample *sample = &cpu->sample;
@@ -1107,7 +1244,7 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
        cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc);
        cpu->sample.busy_scaled = cpu_load;
 
-       return cpu->pstate.current_pstate - pid_calc(&cpu->pid, cpu_load);
+       return get_avg_pstate(cpu) - pid_calc(&cpu->pid, cpu_load);
 }
 
 static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
@@ -1127,8 +1264,8 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
         * specified pstate.
         */
        core_busy = cpu->sample.core_pct_busy;
-       max_pstate = int_tofp(cpu->pstate.max_pstate_physical);
-       current_pstate = int_tofp(cpu->pstate.current_pstate);
+       max_pstate = cpu->pstate.max_pstate_physical;
+       current_pstate = cpu->pstate.current_pstate;
        core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
 
        /*
@@ -1139,8 +1276,7 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
         */
        duration_ns = cpu->sample.time - cpu->last_sample_time;
        if ((s64)duration_ns > pid_params.sample_rate_ns * 3) {
-               sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns),
-                                     int_tofp(duration_ns));
+               sample_ratio = div_fp(pid_params.sample_rate_ns, duration_ns);
                core_busy = mul_fp(core_busy, sample_ratio);
        } else {
                sample_ratio = div_fp(100 * cpu->sample.mperf, cpu->sample.tsc);
@@ -1261,9 +1397,7 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
 
        intel_pstate_busy_pid_reset(cpu);
 
-       cpu->update_util.func = intel_pstate_update_util;
-
-       pr_debug("intel_pstate: controlling: cpu %d\n", cpunum);
+       pr_debug("controlling: cpu %d\n", cpunum);
 
        return 0;
 }
@@ -1286,12 +1420,13 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
 
        /* Prevent intel_pstate_update_util() from using stale data. */
        cpu->sample.time = 0;
-       cpufreq_set_update_util_data(cpu_num, &cpu->update_util);
+       cpufreq_add_update_util_hook(cpu_num, &cpu->update_util,
+                                    intel_pstate_update_util);
 }
 
 static void intel_pstate_clear_update_util_hook(unsigned int cpu)
 {
-       cpufreq_set_update_util_data(cpu, NULL);
+       cpufreq_remove_update_util_hook(cpu);
        synchronize_sched();
 }
 
@@ -1311,20 +1446,31 @@ static void intel_pstate_set_performance_limits(struct perf_limits *limits)
 
 static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 {
+       struct cpudata *cpu;
+
        if (!policy->cpuinfo.max_freq)
                return -ENODEV;
 
        intel_pstate_clear_update_util_hook(policy->cpu);
 
+       cpu = all_cpu_data[0];
+       if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate) {
+               if (policy->max < policy->cpuinfo.max_freq &&
+                   policy->max > cpu->pstate.max_pstate * cpu->pstate.scaling) {
+                       pr_debug("policy->max > max non turbo frequency\n");
+                       policy->max = policy->cpuinfo.max_freq;
+               }
+       }
+
        if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
                limits = &performance_limits;
                if (policy->max >= policy->cpuinfo.max_freq) {
-                       pr_debug("intel_pstate: set performance\n");
+                       pr_debug("set performance\n");
                        intel_pstate_set_performance_limits(limits);
                        goto out;
                }
        } else {
-               pr_debug("intel_pstate: set powersave\n");
+               pr_debug("set powersave\n");
                limits = &powersave_limits;
        }
 
@@ -1348,10 +1494,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
        /* Make sure min_perf_pct <= max_perf_pct */
        limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
 
-       limits->min_perf = div_fp(int_tofp(limits->min_perf_pct),
-                                 int_tofp(100));
-       limits->max_perf = div_fp(int_tofp(limits->max_perf_pct),
-                                 int_tofp(100));
+       limits->min_perf = div_fp(limits->min_perf_pct, 100);
+       limits->max_perf = div_fp(limits->max_perf_pct, 100);
 
  out:
        intel_pstate_set_update_util_hook(policy->cpu);
@@ -1377,7 +1521,7 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
        int cpu_num = policy->cpu;
        struct cpudata *cpu = all_cpu_data[cpu_num];
 
-       pr_debug("intel_pstate: CPU %d exiting\n", cpu_num);
+       pr_debug("CPU %d exiting\n", cpu_num);
 
        intel_pstate_clear_update_util_hook(cpu_num);
 
@@ -1410,12 +1554,20 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
        policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
        policy->cpuinfo.max_freq =
                cpu->pstate.turbo_pstate * cpu->pstate.scaling;
+       intel_pstate_init_acpi_perf_limits(policy);
        policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
        cpumask_set_cpu(policy->cpu, policy->cpus);
 
        return 0;
 }
 
+static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
+{
+       intel_pstate_exit_perf_limits(policy);
+
+       return 0;
+}
+
 static struct cpufreq_driver intel_pstate_driver = {
        .flags          = CPUFREQ_CONST_LOOPS,
        .verify         = intel_pstate_verify_policy,
@@ -1423,6 +1575,7 @@ static struct cpufreq_driver intel_pstate_driver = {
        .resume         = intel_pstate_hwp_set_policy,
        .get            = intel_pstate_get,
        .init           = intel_pstate_cpu_init,
+       .exit           = intel_pstate_cpu_exit,
        .stop_cpu       = intel_pstate_stop_cpu,
        .name           = "intel_pstate",
 };
@@ -1466,8 +1619,7 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs)
 
 }
 
-#if IS_ENABLED(CONFIG_ACPI)
-#include <acpi/processor.h>
+#ifdef CONFIG_ACPI
 
 static bool intel_pstate_no_acpi_pss(void)
 {
@@ -1623,7 +1775,7 @@ hwp_cpu_matched:
        if (intel_pstate_platform_pwr_mgmt_exists())
                return -ENODEV;
 
-       pr_info("Intel P-state driver initializing.\n");
+       pr_info("Intel P-state driver initializing\n");
 
        all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus());
        if (!all_cpu_data)
@@ -1640,7 +1792,7 @@ hwp_cpu_matched:
        intel_pstate_sysfs_expose_params();
 
        if (hwp_active)
-               pr_info("intel_pstate: HWP enabled\n");
+               pr_info("HWP enabled\n");
 
        return rc;
 out:
@@ -1666,13 +1818,19 @@ static int __init intel_pstate_setup(char *str)
        if (!strcmp(str, "disable"))
                no_load = 1;
        if (!strcmp(str, "no_hwp")) {
-               pr_info("intel_pstate: HWP disabled\n");
+               pr_info("HWP disabled\n");
                no_hwp = 1;
        }
        if (!strcmp(str, "force"))
                force_load = 1;
        if (!strcmp(str, "hwp_only"))
                hwp_only = 1;
+
+#ifdef CONFIG_ACPI
+       if (!strcmp(str, "support_acpi_ppc"))
+               acpi_ppc = true;
+#endif
+
        return 0;
 }
 early_param("intel_pstate", intel_pstate_setup);