X-Git-Url: http://git.cascardo.info/?a=blobdiff_plain;f=drivers%2Fcpufreq%2Fintel_pstate.c;h=4737520ec8230a830d80e81c0dbc9dbaa96d0dc7;hb=2f1d407adab026b34a105ed27b1d4d7e910c4448;hp=9ec033b4f2d9d6693a5d0139da92c5a8e2b22ab1;hpb=fdf1f7ff1bd7f1c6d1d5dc2b29b6b11a4f722276;p=cascardo%2Flinux.git diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 9ec033b4f2d9..4737520ec823 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -179,8 +179,11 @@ struct _pid { /** * struct cpudata - Per CPU instance data storage * @cpu: CPU number for this instance data + * @policy: CPUFreq policy value * @update_util: CPUFreq utility callback information * @update_util_set: CPUFreq utility callback is set + * @iowait_boost: iowait-related boost fraction + * @last_update: Time of the last update. * @pstate: Stores P state limits for this CPU * @vid: Stores VID limits for this CPU * @pid: Stores PID parameters for this CPU @@ -199,6 +202,7 @@ struct _pid { struct cpudata { int cpu; + unsigned int policy; struct update_util_data update_util; bool update_util_set; @@ -206,6 +210,7 @@ struct cpudata { struct vid_data vid; struct _pid pid; + u64 last_update; u64 last_sample_time; u64 prev_aperf; u64 prev_mperf; @@ -216,12 +221,13 @@ struct cpudata { struct acpi_processor_performance acpi_perf_data; bool valid_pss_table; #endif + unsigned int iowait_boost; }; static struct cpudata **all_cpu_data; /** - * struct pid_adjust_policy - Stores static PID configuration data + * struct pstate_adjust_policy - Stores static PID configuration data * @sample_rate_ms: PID calculation sample rate in ms * @sample_rate_ns: Sample rate calculation in ns * @deadband: PID deadband @@ -229,6 +235,7 @@ static struct cpudata **all_cpu_data; * @p_gain_pct: PID proportional gain * @i_gain_pct: PID integral gain * @d_gain_pct: PID derivative gain + * @boost_iowait: Whether or not to use iowait boosting. * * Stores per CPU model static PID configuration data. */ @@ -240,6 +247,7 @@ struct pstate_adjust_policy { int p_gain_pct; int d_gain_pct; int i_gain_pct; + bool boost_iowait; }; /** @@ -556,12 +564,12 @@ static void intel_pstate_hwp_set(const struct cpumask *cpumask) int min, hw_min, max, hw_max, cpu, range, adj_range; u64 value, cap; - rdmsrl(MSR_HWP_CAPABILITIES, cap); - hw_min = HWP_LOWEST_PERF(cap); - hw_max = HWP_HIGHEST_PERF(cap); - range = hw_max - hw_min; - for_each_cpu(cpu, cpumask) { + rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); + hw_min = HWP_LOWEST_PERF(cap); + hw_max = HWP_HIGHEST_PERF(cap); + range = hw_max - hw_min; + rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value); adj_range = limits->min_perf_pct * range / 100; min = hw_min + adj_range; @@ -1029,7 +1037,7 @@ static struct cpu_defaults core_params = { }, }; -static struct cpu_defaults silvermont_params = { +static const struct cpu_defaults silvermont_params = { .pid_policy = { .sample_rate_ms = 10, .deadband = 0, @@ -1037,6 +1045,7 @@ static struct cpu_defaults silvermont_params = { .p_gain_pct = 14, .d_gain_pct = 0, .i_gain_pct = 4, + .boost_iowait = true, }, .funcs = { .get_max = atom_get_max_pstate, @@ -1050,7 +1059,7 @@ static struct cpu_defaults silvermont_params = { }, }; -static struct cpu_defaults airmont_params = { +static const struct cpu_defaults airmont_params = { .pid_policy = { .sample_rate_ms = 10, .deadband = 0, @@ -1058,6 +1067,7 @@ static struct cpu_defaults airmont_params = { .p_gain_pct = 14, .d_gain_pct = 0, .i_gain_pct = 4, + .boost_iowait = true, }, .funcs = { .get_max = atom_get_max_pstate, @@ -1071,7 +1081,7 @@ static struct cpu_defaults airmont_params = { }, }; -static struct cpu_defaults knl_params = { +static const struct cpu_defaults knl_params = { .pid_policy = { .sample_rate_ms = 10, .deadband = 0, @@ -1091,7 +1101,7 @@ static struct cpu_defaults knl_params = { }, }; -static struct cpu_defaults bxt_params = { +static const struct cpu_defaults bxt_params = { .pid_policy = { .sample_rate_ms = 10, .deadband = 0, @@ -1099,6 +1109,7 @@ static struct cpu_defaults bxt_params = { .p_gain_pct = 14, .d_gain_pct = 0, .i_gain_pct = 4, + .boost_iowait = true, }, .funcs = { .get_max = core_get_max_pstate, @@ -1133,10 +1144,8 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); } -static void intel_pstate_set_min_pstate(struct cpudata *cpu) +static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) { - int pstate = cpu->pstate.min_pstate; - trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); cpu->pstate.current_pstate = pstate; /* @@ -1148,6 +1157,20 @@ static void intel_pstate_set_min_pstate(struct cpudata *cpu) pstate_funcs.get_val(cpu, pstate)); } +static void intel_pstate_set_min_pstate(struct cpudata *cpu) +{ + intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate); +} + +static void intel_pstate_max_within_limits(struct cpudata *cpu) +{ + int min_pstate, max_pstate; + + update_turbo_state(); + intel_pstate_get_min_max(cpu, &min_pstate, &max_pstate); + intel_pstate_set_pstate(cpu, max_pstate); +} + static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) { cpu->pstate.min_pstate = pstate_funcs.get_min(); @@ -1222,36 +1245,38 @@ static inline int32_t get_avg_pstate(struct cpudata *cpu) static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) { struct sample *sample = &cpu->sample; - u64 cummulative_iowait, delta_iowait_us; - u64 delta_iowait_mperf; - u64 mperf, now; - int32_t cpu_load; + int32_t busy_frac, boost; + int target, avg_pstate; - cummulative_iowait = get_cpu_iowait_time_us(cpu->cpu, &now); + busy_frac = div_fp(sample->mperf, sample->tsc); - /* - * Convert iowait time into number of IO cycles spent at max_freq. - * IO is considered as busy only for the cpu_load algorithm. For - * performance this is not needed since we always try to reach the - * maximum P-State, so we are already boosting the IOs. - */ - delta_iowait_us = cummulative_iowait - cpu->prev_cummulative_iowait; - delta_iowait_mperf = div64_u64(delta_iowait_us * cpu->pstate.scaling * - cpu->pstate.max_pstate, MSEC_PER_SEC); + boost = cpu->iowait_boost; + cpu->iowait_boost >>= 1; - mperf = cpu->sample.mperf + delta_iowait_mperf; - cpu->prev_cummulative_iowait = cummulative_iowait; + if (busy_frac < boost) + busy_frac = boost; + + sample->busy_scaled = busy_frac * 100; + + target = limits->no_turbo || limits->turbo_disabled ? + cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; + target += target >> 2; + target = mul_fp(target, busy_frac); + if (target < cpu->pstate.min_pstate) + target = cpu->pstate.min_pstate; /* - * The load can be estimated as the ratio of the mperf counter - * running at a constant frequency during active periods - * (C0) and the time stamp counter running at the same frequency - * also during C-states. + * If the average P-state during the previous cycle was higher than the + * current target, add 50% of the difference to the target to reduce + * possible performance oscillations and offset possible performance + * loss related to moving the workload from one CPU to another within + * a package/module. */ - cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc); - cpu->sample.busy_scaled = cpu_load; + avg_pstate = get_avg_pstate(cpu); + if (avg_pstate > target) + target += (avg_pstate - target) >> 1; - return get_avg_pstate(cpu) - pid_calc(&cpu->pid, cpu_load); + return target; } static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) @@ -1260,10 +1285,11 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) u64 duration_ns; /* - * perf_scaled is the average performance during the last sampling - * period scaled by the ratio of the maximum P-state to the P-state - * requested last time (in percent). That measures the system's - * response to the previous P-state selection. + * perf_scaled is the ratio of the average P-state during the last + * sampling period to the P-state requested last time (in percent). + * + * That measures the system's response to the previous P-state + * selection. */ max_pstate = cpu->pstate.max_pstate_physical; current_pstate = cpu->pstate.current_pstate; @@ -1313,7 +1339,8 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) from = cpu->pstate.current_pstate; - target_pstate = pstate_funcs.get_target_pstate(cpu); + target_pstate = cpu->policy == CPUFREQ_POLICY_PERFORMANCE ? + cpu->pstate.turbo_pstate : pstate_funcs.get_target_pstate(cpu); intel_pstate_update_pstate(cpu, target_pstate); @@ -1325,15 +1352,29 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) sample->mperf, sample->aperf, sample->tsc, - get_avg_frequency(cpu)); + get_avg_frequency(cpu), + fp_toint(cpu->iowait_boost * 100)); } static void intel_pstate_update_util(struct update_util_data *data, u64 time, - unsigned long util, unsigned long max) + unsigned int flags) { struct cpudata *cpu = container_of(data, struct cpudata, update_util); - u64 delta_ns = time - cpu->sample.time; + u64 delta_ns; + + if (pid_params.boost_iowait) { + if (flags & SCHED_CPUFREQ_IOWAIT) { + cpu->iowait_boost = int_tofp(1); + } else if (cpu->iowait_boost) { + /* Clear iowait_boost if the CPU may have been idle. */ + delta_ns = time - cpu->last_update; + if (delta_ns > TICK_NSEC) + cpu->iowait_boost = 0; + } + cpu->last_update = time; + } + delta_ns = time - cpu->sample.time; if ((s64)delta_ns >= pid_params.sample_rate_ns) { bool sample_taken = intel_pstate_sample(cpu, time); @@ -1374,6 +1415,8 @@ MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids); static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] __initconst = { ICPU(INTEL_FAM6_BROADWELL_XEON_D, core_params), + ICPU(INTEL_FAM6_BROADWELL_X, core_params), + ICPU(INTEL_FAM6_SKYLAKE_X, core_params), {} }; @@ -1463,7 +1506,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) pr_debug("set_policy cpuinfo.max %u policy->max %u\n", policy->cpuinfo.max_freq, policy->max); - cpu = all_cpu_data[0]; + cpu = all_cpu_data[policy->cpu]; + cpu->policy = policy->policy; + if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate && policy->max < policy->cpuinfo.max_freq && policy->max > cpu->pstate.max_pstate * cpu->pstate.scaling) { @@ -1471,7 +1516,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) policy->max = policy->cpuinfo.max_freq; } - if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { + if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { limits = &performance_limits; if (policy->max >= policy->cpuinfo.max_freq) { pr_debug("set performance\n"); @@ -1507,6 +1552,15 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) limits->max_perf = round_up(limits->max_perf, FRAC_BITS); out: + if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { + /* + * NOHZ_FULL CPUs need this as the governor callback may not + * be invoked on them. + */ + intel_pstate_clear_update_util_hook(policy->cpu); + intel_pstate_max_within_limits(cpu); + } + intel_pstate_set_update_util_hook(policy->cpu); intel_pstate_hwp_set_policy(policy);