ipmi: Fix compile issue with isspace()
[cascardo/linux.git] / drivers / thermal / intel_powerclamp.c
1 /*
2  * intel_powerclamp.c - package c-state idle injection
3  *
4  * Copyright (c) 2012, Intel Corporation.
5  *
6  * Authors:
7  *     Arjan van de Ven <arjan@linux.intel.com>
8  *     Jacob Pan <jacob.jun.pan@linux.intel.com>
9  *
10  * This program is free software; you can redistribute it and/or modify it
11  * under the terms and conditions of the GNU General Public License,
12  * version 2, as published by the Free Software Foundation.
13  *
14  * This program is distributed in the hope it will be useful, but WITHOUT
15  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
17  * more details.
18  *
19  * You should have received a copy of the GNU General Public License along with
20  * this program; if not, write to the Free Software Foundation, Inc.,
21  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
22  *
23  *
24  *      TODO:
25  *           1. better handle wakeup from external interrupts, currently a fixed
26  *              compensation is added to clamping duration when excessive amount
27  *              of wakeups are observed during idle time. the reason is that in
28  *              case of external interrupts without need for ack, clamping down
29  *              cpu in non-irq context does not reduce irq. for majority of the
30  *              cases, clamping down cpu does help reduce irq as well, we should
31  *              be able to differenciate the two cases and give a quantitative
32  *              solution for the irqs that we can control. perhaps based on
33  *              get_cpu_iowait_time_us()
34  *
35  *           2. synchronization with other hw blocks
36  *
37  *
38  */
39
40 #define pr_fmt(fmt)     KBUILD_MODNAME ": " fmt
41
42 #include <linux/module.h>
43 #include <linux/kernel.h>
44 #include <linux/delay.h>
45 #include <linux/kthread.h>
46 #include <linux/freezer.h>
47 #include <linux/cpu.h>
48 #include <linux/thermal.h>
49 #include <linux/slab.h>
50 #include <linux/tick.h>
51 #include <linux/debugfs.h>
52 #include <linux/seq_file.h>
53 #include <linux/sched/rt.h>
54
55 #include <asm/nmi.h>
56 #include <asm/msr.h>
57 #include <asm/mwait.h>
58 #include <asm/cpu_device_id.h>
59 #include <asm/idle.h>
60 #include <asm/hardirq.h>
61
62 #define MAX_TARGET_RATIO (50U)
63 /* For each undisturbed clamping period (no extra wake ups during idle time),
64  * we increment the confidence counter for the given target ratio.
65  * CONFIDENCE_OK defines the level where runtime calibration results are
66  * valid.
67  */
68 #define CONFIDENCE_OK (3)
69 /* Default idle injection duration, driver adjust sleep time to meet target
70  * idle ratio. Similar to frequency modulation.
71  */
72 #define DEFAULT_DURATION_JIFFIES (6)
73
74 static unsigned int target_mwait;
75 static struct dentry *debug_dir;
76
77 /* user selected target */
78 static unsigned int set_target_ratio;
79 static unsigned int current_ratio;
80 static bool should_skip;
81 static bool reduce_irq;
82 static atomic_t idle_wakeup_counter;
83 static unsigned int control_cpu; /* The cpu assigned to collect stat and update
84                                   * control parameters. default to BSP but BSP
85                                   * can be offlined.
86                                   */
87 static bool clamping;
88
89
90 static struct task_struct * __percpu *powerclamp_thread;
91 static struct thermal_cooling_device *cooling_dev;
92 static unsigned long *cpu_clamping_mask;  /* bit map for tracking per cpu
93                                            * clamping thread
94                                            */
95
96 static unsigned int duration;
97 static unsigned int pkg_cstate_ratio_cur;
98 static unsigned int window_size;
99
100 static int duration_set(const char *arg, const struct kernel_param *kp)
101 {
102         int ret = 0;
103         unsigned long new_duration;
104
105         ret = kstrtoul(arg, 10, &new_duration);
106         if (ret)
107                 goto exit;
108         if (new_duration > 25 || new_duration < 6) {
109                 pr_err("Out of recommended range %lu, between 6-25ms\n",
110                         new_duration);
111                 ret = -EINVAL;
112         }
113
114         duration = clamp(new_duration, 6ul, 25ul);
115         smp_mb();
116
117 exit:
118
119         return ret;
120 }
121
122 static struct kernel_param_ops duration_ops = {
123         .set = duration_set,
124         .get = param_get_int,
125 };
126
127
128 module_param_cb(duration, &duration_ops, &duration, 0644);
129 MODULE_PARM_DESC(duration, "forced idle time for each attempt in msec.");
130
131 struct powerclamp_calibration_data {
132         unsigned long confidence;  /* used for calibration, basically a counter
133                                     * gets incremented each time a clamping
134                                     * period is completed without extra wakeups
135                                     * once that counter is reached given level,
136                                     * compensation is deemed usable.
137                                     */
138         unsigned long steady_comp; /* steady state compensation used when
139                                     * no extra wakeups occurred.
140                                     */
141         unsigned long dynamic_comp; /* compensate excessive wakeup from idle
142                                      * mostly from external interrupts.
143                                      */
144 };
145
146 static struct powerclamp_calibration_data cal_data[MAX_TARGET_RATIO];
147
148 static int window_size_set(const char *arg, const struct kernel_param *kp)
149 {
150         int ret = 0;
151         unsigned long new_window_size;
152
153         ret = kstrtoul(arg, 10, &new_window_size);
154         if (ret)
155                 goto exit_win;
156         if (new_window_size > 10 || new_window_size < 2) {
157                 pr_err("Out of recommended window size %lu, between 2-10\n",
158                         new_window_size);
159                 ret = -EINVAL;
160         }
161
162         window_size = clamp(new_window_size, 2ul, 10ul);
163         smp_mb();
164
165 exit_win:
166
167         return ret;
168 }
169
170 static struct kernel_param_ops window_size_ops = {
171         .set = window_size_set,
172         .get = param_get_int,
173 };
174
175 module_param_cb(window_size, &window_size_ops, &window_size, 0644);
176 MODULE_PARM_DESC(window_size, "sliding window in number of clamping cycles\n"
177         "\tpowerclamp controls idle ratio within this window. larger\n"
178         "\twindow size results in slower response time but more smooth\n"
179         "\tclamping results. default to 2.");
180
181 static void find_target_mwait(void)
182 {
183         unsigned int eax, ebx, ecx, edx;
184         unsigned int highest_cstate = 0;
185         unsigned int highest_subcstate = 0;
186         int i;
187
188         if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
189                 return;
190
191         cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
192
193         if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
194             !(ecx & CPUID5_ECX_INTERRUPT_BREAK))
195                 return;
196
197         edx >>= MWAIT_SUBSTATE_SIZE;
198         for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
199                 if (edx & MWAIT_SUBSTATE_MASK) {
200                         highest_cstate = i;
201                         highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
202                 }
203         }
204         target_mwait = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
205                 (highest_subcstate - 1);
206
207 }
208
209 static bool has_pkg_state_counter(void)
210 {
211         u64 tmp;
212         return !rdmsrl_safe(MSR_PKG_C2_RESIDENCY, &tmp) ||
213                !rdmsrl_safe(MSR_PKG_C3_RESIDENCY, &tmp) ||
214                !rdmsrl_safe(MSR_PKG_C6_RESIDENCY, &tmp) ||
215                !rdmsrl_safe(MSR_PKG_C7_RESIDENCY, &tmp);
216 }
217
218 static u64 pkg_state_counter(void)
219 {
220         u64 val;
221         u64 count = 0;
222
223         static bool skip_c2;
224         static bool skip_c3;
225         static bool skip_c6;
226         static bool skip_c7;
227
228         if (!skip_c2) {
229                 if (!rdmsrl_safe(MSR_PKG_C2_RESIDENCY, &val))
230                         count += val;
231                 else
232                         skip_c2 = true;
233         }
234
235         if (!skip_c3) {
236                 if (!rdmsrl_safe(MSR_PKG_C3_RESIDENCY, &val))
237                         count += val;
238                 else
239                         skip_c3 = true;
240         }
241
242         if (!skip_c6) {
243                 if (!rdmsrl_safe(MSR_PKG_C6_RESIDENCY, &val))
244                         count += val;
245                 else
246                         skip_c6 = true;
247         }
248
249         if (!skip_c7) {
250                 if (!rdmsrl_safe(MSR_PKG_C7_RESIDENCY, &val))
251                         count += val;
252                 else
253                         skip_c7 = true;
254         }
255
256         return count;
257 }
258
259 static void noop_timer(unsigned long foo)
260 {
261         /* empty... just the fact that we get the interrupt wakes us up */
262 }
263
264 static unsigned int get_compensation(int ratio)
265 {
266         unsigned int comp = 0;
267
268         /* we only use compensation if all adjacent ones are good */
269         if (ratio == 1 &&
270                 cal_data[ratio].confidence >= CONFIDENCE_OK &&
271                 cal_data[ratio + 1].confidence >= CONFIDENCE_OK &&
272                 cal_data[ratio + 2].confidence >= CONFIDENCE_OK) {
273                 comp = (cal_data[ratio].steady_comp +
274                         cal_data[ratio + 1].steady_comp +
275                         cal_data[ratio + 2].steady_comp) / 3;
276         } else if (ratio == MAX_TARGET_RATIO - 1 &&
277                 cal_data[ratio].confidence >= CONFIDENCE_OK &&
278                 cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
279                 cal_data[ratio - 2].confidence >= CONFIDENCE_OK) {
280                 comp = (cal_data[ratio].steady_comp +
281                         cal_data[ratio - 1].steady_comp +
282                         cal_data[ratio - 2].steady_comp) / 3;
283         } else if (cal_data[ratio].confidence >= CONFIDENCE_OK &&
284                 cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
285                 cal_data[ratio + 1].confidence >= CONFIDENCE_OK) {
286                 comp = (cal_data[ratio].steady_comp +
287                         cal_data[ratio - 1].steady_comp +
288                         cal_data[ratio + 1].steady_comp) / 3;
289         }
290
291         /* REVISIT: simple penalty of double idle injection */
292         if (reduce_irq)
293                 comp = ratio;
294         /* do not exceed limit */
295         if (comp + ratio >= MAX_TARGET_RATIO)
296                 comp = MAX_TARGET_RATIO - ratio - 1;
297
298         return comp;
299 }
300
301 static void adjust_compensation(int target_ratio, unsigned int win)
302 {
303         int delta;
304         struct powerclamp_calibration_data *d = &cal_data[target_ratio];
305
306         /*
307          * adjust compensations if confidence level has not been reached or
308          * there are too many wakeups during the last idle injection period, we
309          * cannot trust the data for compensation.
310          */
311         if (d->confidence >= CONFIDENCE_OK ||
312                 atomic_read(&idle_wakeup_counter) >
313                 win * num_online_cpus())
314                 return;
315
316         delta = set_target_ratio - current_ratio;
317         /* filter out bad data */
318         if (delta >= 0 && delta <= (1+target_ratio/10)) {
319                 if (d->steady_comp)
320                         d->steady_comp =
321                                 roundup(delta+d->steady_comp, 2)/2;
322                 else
323                         d->steady_comp = delta;
324                 d->confidence++;
325         }
326 }
327
328 static bool powerclamp_adjust_controls(unsigned int target_ratio,
329                                 unsigned int guard, unsigned int win)
330 {
331         static u64 msr_last, tsc_last;
332         u64 msr_now, tsc_now;
333         u64 val64;
334
335         /* check result for the last window */
336         msr_now = pkg_state_counter();
337         rdtscll(tsc_now);
338
339         /* calculate pkg cstate vs tsc ratio */
340         if (!msr_last || !tsc_last)
341                 current_ratio = 1;
342         else if (tsc_now-tsc_last) {
343                 val64 = 100*(msr_now-msr_last);
344                 do_div(val64, (tsc_now-tsc_last));
345                 current_ratio = val64;
346         }
347
348         /* update record */
349         msr_last = msr_now;
350         tsc_last = tsc_now;
351
352         adjust_compensation(target_ratio, win);
353         /*
354          * too many external interrupts, set flag such
355          * that we can take measure later.
356          */
357         reduce_irq = atomic_read(&idle_wakeup_counter) >=
358                 2 * win * num_online_cpus();
359
360         atomic_set(&idle_wakeup_counter, 0);
361         /* if we are above target+guard, skip */
362         return set_target_ratio + guard <= current_ratio;
363 }
364
365 static int clamp_thread(void *arg)
366 {
367         int cpunr = (unsigned long)arg;
368         DEFINE_TIMER(wakeup_timer, noop_timer, 0, 0);
369         static const struct sched_param param = {
370                 .sched_priority = MAX_USER_RT_PRIO/2,
371         };
372         unsigned int count = 0;
373         unsigned int target_ratio;
374
375         set_bit(cpunr, cpu_clamping_mask);
376         set_freezable();
377         init_timer_on_stack(&wakeup_timer);
378         sched_setscheduler(current, SCHED_FIFO, &param);
379
380         while (true == clamping && !kthread_should_stop() &&
381                 cpu_online(cpunr)) {
382                 int sleeptime;
383                 unsigned long target_jiffies;
384                 unsigned int guard;
385                 unsigned int compensation = 0;
386                 int interval; /* jiffies to sleep for each attempt */
387                 unsigned int duration_jiffies = msecs_to_jiffies(duration);
388                 unsigned int window_size_now;
389
390                 try_to_freeze();
391                 /*
392                  * make sure user selected ratio does not take effect until
393                  * the next round. adjust target_ratio if user has changed
394                  * target such that we can converge quickly.
395                  */
396                 target_ratio = set_target_ratio;
397                 guard = 1 + target_ratio/20;
398                 window_size_now = window_size;
399                 count++;
400
401                 /*
402                  * systems may have different ability to enter package level
403                  * c-states, thus we need to compensate the injected idle ratio
404                  * to achieve the actual target reported by the HW.
405                  */
406                 compensation = get_compensation(target_ratio);
407                 interval = duration_jiffies*100/(target_ratio+compensation);
408
409                 /* align idle time */
410                 target_jiffies = roundup(jiffies, interval);
411                 sleeptime = target_jiffies - jiffies;
412                 if (sleeptime <= 0)
413                         sleeptime = 1;
414                 schedule_timeout_interruptible(sleeptime);
415                 /*
416                  * only elected controlling cpu can collect stats and update
417                  * control parameters.
418                  */
419                 if (cpunr == control_cpu && !(count%window_size_now)) {
420                         should_skip =
421                                 powerclamp_adjust_controls(target_ratio,
422                                                         guard, window_size_now);
423                         smp_mb();
424                 }
425
426                 if (should_skip)
427                         continue;
428
429                 target_jiffies = jiffies + duration_jiffies;
430                 mod_timer(&wakeup_timer, target_jiffies);
431                 if (unlikely(local_softirq_pending()))
432                         continue;
433                 /*
434                  * stop tick sched during idle time, interrupts are still
435                  * allowed. thus jiffies are updated properly.
436                  */
437                 preempt_disable();
438                 tick_nohz_idle_enter();
439                 /* mwait until target jiffies is reached */
440                 while (time_before(jiffies, target_jiffies)) {
441                         unsigned long ecx = 1;
442                         unsigned long eax = target_mwait;
443
444                         /*
445                          * REVISIT: may call enter_idle() to notify drivers who
446                          * can save power during cpu idle. same for exit_idle()
447                          */
448                         local_touch_nmi();
449                         stop_critical_timings();
450                         mwait_idle_with_hints(eax, ecx);
451                         start_critical_timings();
452                         atomic_inc(&idle_wakeup_counter);
453                 }
454                 tick_nohz_idle_exit();
455                 preempt_enable();
456         }
457         del_timer_sync(&wakeup_timer);
458         clear_bit(cpunr, cpu_clamping_mask);
459
460         return 0;
461 }
462
463 /*
464  * 1 HZ polling while clamping is active, useful for userspace
465  * to monitor actual idle ratio.
466  */
467 static void poll_pkg_cstate(struct work_struct *dummy);
468 static DECLARE_DELAYED_WORK(poll_pkg_cstate_work, poll_pkg_cstate);
469 static void poll_pkg_cstate(struct work_struct *dummy)
470 {
471         static u64 msr_last;
472         static u64 tsc_last;
473         static unsigned long jiffies_last;
474
475         u64 msr_now;
476         unsigned long jiffies_now;
477         u64 tsc_now;
478         u64 val64;
479
480         msr_now = pkg_state_counter();
481         rdtscll(tsc_now);
482         jiffies_now = jiffies;
483
484         /* calculate pkg cstate vs tsc ratio */
485         if (!msr_last || !tsc_last)
486                 pkg_cstate_ratio_cur = 1;
487         else {
488                 if (tsc_now - tsc_last) {
489                         val64 = 100 * (msr_now - msr_last);
490                         do_div(val64, (tsc_now - tsc_last));
491                         pkg_cstate_ratio_cur = val64;
492                 }
493         }
494
495         /* update record */
496         msr_last = msr_now;
497         jiffies_last = jiffies_now;
498         tsc_last = tsc_now;
499
500         if (true == clamping)
501                 schedule_delayed_work(&poll_pkg_cstate_work, HZ);
502 }
503
504 static int start_power_clamp(void)
505 {
506         unsigned long cpu;
507         struct task_struct *thread;
508
509         /* check if pkg cstate counter is completely 0, abort in this case */
510         if (!has_pkg_state_counter()) {
511                 pr_err("pkg cstate counter not functional, abort\n");
512                 return -EINVAL;
513         }
514
515         set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1);
516         /* prevent cpu hotplug */
517         get_online_cpus();
518
519         /* prefer BSP */
520         control_cpu = 0;
521         if (!cpu_online(control_cpu))
522                 control_cpu = smp_processor_id();
523
524         clamping = true;
525         schedule_delayed_work(&poll_pkg_cstate_work, 0);
526
527         /* start one thread per online cpu */
528         for_each_online_cpu(cpu) {
529                 struct task_struct **p =
530                         per_cpu_ptr(powerclamp_thread, cpu);
531
532                 thread = kthread_create_on_node(clamp_thread,
533                                                 (void *) cpu,
534                                                 cpu_to_node(cpu),
535                                                 "kidle_inject/%ld", cpu);
536                 /* bind to cpu here */
537                 if (likely(!IS_ERR(thread))) {
538                         kthread_bind(thread, cpu);
539                         wake_up_process(thread);
540                         *p = thread;
541                 }
542
543         }
544         put_online_cpus();
545
546         return 0;
547 }
548
549 static void end_power_clamp(void)
550 {
551         int i;
552         struct task_struct *thread;
553
554         clamping = false;
555         /*
556          * make clamping visible to other cpus and give per cpu clamping threads
557          * sometime to exit, or gets killed later.
558          */
559         smp_mb();
560         msleep(20);
561         if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) {
562                 for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) {
563                         pr_debug("clamping thread for cpu %d alive, kill\n", i);
564                         thread = *per_cpu_ptr(powerclamp_thread, i);
565                         kthread_stop(thread);
566                 }
567         }
568 }
569
570 static int powerclamp_cpu_callback(struct notifier_block *nfb,
571                                 unsigned long action, void *hcpu)
572 {
573         unsigned long cpu = (unsigned long)hcpu;
574         struct task_struct *thread;
575         struct task_struct **percpu_thread =
576                 per_cpu_ptr(powerclamp_thread, cpu);
577
578         if (false == clamping)
579                 goto exit_ok;
580
581         switch (action) {
582         case CPU_ONLINE:
583                 thread = kthread_create_on_node(clamp_thread,
584                                                 (void *) cpu,
585                                                 cpu_to_node(cpu),
586                                                 "kidle_inject/%lu", cpu);
587                 if (likely(!IS_ERR(thread))) {
588                         kthread_bind(thread, cpu);
589                         wake_up_process(thread);
590                         *percpu_thread = thread;
591                 }
592                 /* prefer BSP as controlling CPU */
593                 if (cpu == 0) {
594                         control_cpu = 0;
595                         smp_mb();
596                 }
597                 break;
598         case CPU_DEAD:
599                 if (test_bit(cpu, cpu_clamping_mask)) {
600                         pr_err("cpu %lu dead but powerclamping thread is not\n",
601                                 cpu);
602                         kthread_stop(*percpu_thread);
603                 }
604                 if (cpu == control_cpu) {
605                         control_cpu = smp_processor_id();
606                         smp_mb();
607                 }
608         }
609
610 exit_ok:
611         return NOTIFY_OK;
612 }
613
614 static struct notifier_block powerclamp_cpu_notifier = {
615         .notifier_call = powerclamp_cpu_callback,
616 };
617
618 static int powerclamp_get_max_state(struct thermal_cooling_device *cdev,
619                                  unsigned long *state)
620 {
621         *state = MAX_TARGET_RATIO;
622
623         return 0;
624 }
625
626 static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev,
627                                  unsigned long *state)
628 {
629         if (true == clamping)
630                 *state = pkg_cstate_ratio_cur;
631         else
632                 /* to save power, do not poll idle ratio while not clamping */
633                 *state = -1; /* indicates invalid state */
634
635         return 0;
636 }
637
638 static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev,
639                                  unsigned long new_target_ratio)
640 {
641         int ret = 0;
642
643         new_target_ratio = clamp(new_target_ratio, 0UL,
644                                 (unsigned long) (MAX_TARGET_RATIO-1));
645         if (set_target_ratio == 0 && new_target_ratio > 0) {
646                 pr_info("Start idle injection to reduce power\n");
647                 set_target_ratio = new_target_ratio;
648                 ret = start_power_clamp();
649                 goto exit_set;
650         } else  if (set_target_ratio > 0 && new_target_ratio == 0) {
651                 pr_info("Stop forced idle injection\n");
652                 set_target_ratio = 0;
653                 end_power_clamp();
654         } else  /* adjust currently running */ {
655                 set_target_ratio = new_target_ratio;
656                 /* make new set_target_ratio visible to other cpus */
657                 smp_mb();
658         }
659
660 exit_set:
661         return ret;
662 }
663
664 /* bind to generic thermal layer as cooling device*/
665 static struct thermal_cooling_device_ops powerclamp_cooling_ops = {
666         .get_max_state = powerclamp_get_max_state,
667         .get_cur_state = powerclamp_get_cur_state,
668         .set_cur_state = powerclamp_set_cur_state,
669 };
670
671 /* runs on Nehalem and later */
672 static const struct x86_cpu_id intel_powerclamp_ids[] = {
673         { X86_VENDOR_INTEL, 6, 0x1a},
674         { X86_VENDOR_INTEL, 6, 0x1c},
675         { X86_VENDOR_INTEL, 6, 0x1e},
676         { X86_VENDOR_INTEL, 6, 0x1f},
677         { X86_VENDOR_INTEL, 6, 0x25},
678         { X86_VENDOR_INTEL, 6, 0x26},
679         { X86_VENDOR_INTEL, 6, 0x2a},
680         { X86_VENDOR_INTEL, 6, 0x2c},
681         { X86_VENDOR_INTEL, 6, 0x2d},
682         { X86_VENDOR_INTEL, 6, 0x2e},
683         { X86_VENDOR_INTEL, 6, 0x2f},
684         { X86_VENDOR_INTEL, 6, 0x37},
685         { X86_VENDOR_INTEL, 6, 0x3a},
686         { X86_VENDOR_INTEL, 6, 0x3c},
687         { X86_VENDOR_INTEL, 6, 0x3d},
688         { X86_VENDOR_INTEL, 6, 0x3e},
689         { X86_VENDOR_INTEL, 6, 0x3f},
690         { X86_VENDOR_INTEL, 6, 0x45},
691         { X86_VENDOR_INTEL, 6, 0x46},
692         {}
693 };
694 MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);
695
696 static int powerclamp_probe(void)
697 {
698         if (!x86_match_cpu(intel_powerclamp_ids)) {
699                 pr_err("Intel powerclamp does not run on family %d model %d\n",
700                                 boot_cpu_data.x86, boot_cpu_data.x86_model);
701                 return -ENODEV;
702         }
703         if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC) ||
704                 !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ||
705                 !boot_cpu_has(X86_FEATURE_MWAIT) ||
706                 !boot_cpu_has(X86_FEATURE_ARAT))
707                 return -ENODEV;
708
709         /* find the deepest mwait value */
710         find_target_mwait();
711
712         return 0;
713 }
714
715 static int powerclamp_debug_show(struct seq_file *m, void *unused)
716 {
717         int i = 0;
718
719         seq_printf(m, "controlling cpu: %d\n", control_cpu);
720         seq_printf(m, "pct confidence steady dynamic (compensation)\n");
721         for (i = 0; i < MAX_TARGET_RATIO; i++) {
722                 seq_printf(m, "%d\t%lu\t%lu\t%lu\n",
723                         i,
724                         cal_data[i].confidence,
725                         cal_data[i].steady_comp,
726                         cal_data[i].dynamic_comp);
727         }
728
729         return 0;
730 }
731
732 static int powerclamp_debug_open(struct inode *inode,
733                         struct file *file)
734 {
735         return single_open(file, powerclamp_debug_show, inode->i_private);
736 }
737
738 static const struct file_operations powerclamp_debug_fops = {
739         .open           = powerclamp_debug_open,
740         .read           = seq_read,
741         .llseek         = seq_lseek,
742         .release        = single_release,
743         .owner          = THIS_MODULE,
744 };
745
746 static inline void powerclamp_create_debug_files(void)
747 {
748         debug_dir = debugfs_create_dir("intel_powerclamp", NULL);
749         if (!debug_dir)
750                 return;
751
752         if (!debugfs_create_file("powerclamp_calib", S_IRUGO, debug_dir,
753                                         cal_data, &powerclamp_debug_fops))
754                 goto file_error;
755
756         return;
757
758 file_error:
759         debugfs_remove_recursive(debug_dir);
760 }
761
762 static int powerclamp_init(void)
763 {
764         int retval;
765         int bitmap_size;
766
767         bitmap_size = BITS_TO_LONGS(num_possible_cpus()) * sizeof(long);
768         cpu_clamping_mask = kzalloc(bitmap_size, GFP_KERNEL);
769         if (!cpu_clamping_mask)
770                 return -ENOMEM;
771
772         /* probe cpu features and ids here */
773         retval = powerclamp_probe();
774         if (retval)
775                 goto exit_free;
776
777         /* set default limit, maybe adjusted during runtime based on feedback */
778         window_size = 2;
779         register_hotcpu_notifier(&powerclamp_cpu_notifier);
780
781         powerclamp_thread = alloc_percpu(struct task_struct *);
782         if (!powerclamp_thread) {
783                 retval = -ENOMEM;
784                 goto exit_unregister;
785         }
786
787         cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL,
788                                                 &powerclamp_cooling_ops);
789         if (IS_ERR(cooling_dev)) {
790                 retval = -ENODEV;
791                 goto exit_free_thread;
792         }
793
794         if (!duration)
795                 duration = jiffies_to_msecs(DEFAULT_DURATION_JIFFIES);
796
797         powerclamp_create_debug_files();
798
799         return 0;
800
801 exit_free_thread:
802         free_percpu(powerclamp_thread);
803 exit_unregister:
804         unregister_hotcpu_notifier(&powerclamp_cpu_notifier);
805 exit_free:
806         kfree(cpu_clamping_mask);
807         return retval;
808 }
809 module_init(powerclamp_init);
810
811 static void powerclamp_exit(void)
812 {
813         unregister_hotcpu_notifier(&powerclamp_cpu_notifier);
814         end_power_clamp();
815         free_percpu(powerclamp_thread);
816         thermal_cooling_device_unregister(cooling_dev);
817         kfree(cpu_clamping_mask);
818
819         cancel_delayed_work_sync(&poll_pkg_cstate_work);
820         debugfs_remove_recursive(debug_dir);
821 }
822 module_exit(powerclamp_exit);
823
824 MODULE_LICENSE("GPL");
825 MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
826 MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@linux.intel.com>");
827 MODULE_DESCRIPTION("Package Level C-state Idle Injection for Intel CPUs");