x86, hypervisor: add missing <linux/module.h>
[cascardo/linux.git] / drivers / cpufreq / cpufreq_ondemand.c
1 /*
2  *  drivers/cpufreq/cpufreq_ondemand.c
3  *
4  *  Copyright (C)  2001 Russell King
5  *            (C)  2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
6  *                      Jun Nakajima <jun.nakajima@intel.com>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 as
10  * published by the Free Software Foundation.
11  */
12
13 #include <linux/kernel.h>
14 #include <linux/module.h>
15 #include <linux/init.h>
16 #include <linux/cpufreq.h>
17 #include <linux/cpu.h>
18 #include <linux/jiffies.h>
19 #include <linux/kernel_stat.h>
20 #include <linux/mutex.h>
21 #include <linux/hrtimer.h>
22 #include <linux/tick.h>
23 #include <linux/ktime.h>
24 #include <linux/sched.h>
25
26 /*
27  * dbs is used in this file as a shortform for demandbased switching
28  * It helps to keep variable names smaller, simpler
29  */
30
31 #define DEF_FREQUENCY_DOWN_DIFFERENTIAL         (10)
32 #define DEF_FREQUENCY_UP_THRESHOLD              (80)
33 #define MICRO_FREQUENCY_DOWN_DIFFERENTIAL       (3)
34 #define MICRO_FREQUENCY_UP_THRESHOLD            (95)
35 #define MICRO_FREQUENCY_MIN_SAMPLE_RATE         (10000)
36 #define MIN_FREQUENCY_UP_THRESHOLD              (11)
37 #define MAX_FREQUENCY_UP_THRESHOLD              (100)
38
39 /*
40  * The polling frequency of this governor depends on the capability of
41  * the processor. Default polling frequency is 1000 times the transition
42  * latency of the processor. The governor will work on any processor with
43  * transition latency <= 10mS, using appropriate sampling
44  * rate.
45  * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL)
46  * this governor will not work.
47  * All times here are in uS.
48  */
49 #define MIN_SAMPLING_RATE_RATIO                 (2)
50
51 static unsigned int min_sampling_rate;
52
53 #define LATENCY_MULTIPLIER                      (1000)
54 #define MIN_LATENCY_MULTIPLIER                  (100)
55 #define TRANSITION_LATENCY_LIMIT                (10 * 1000 * 1000)
56
57 static void do_dbs_timer(struct work_struct *work);
58 static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
59                                 unsigned int event);
60
61 #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
62 static
63 #endif
64 struct cpufreq_governor cpufreq_gov_ondemand = {
65        .name                   = "ondemand",
66        .governor               = cpufreq_governor_dbs,
67        .max_transition_latency = TRANSITION_LATENCY_LIMIT,
68        .owner                  = THIS_MODULE,
69 };
70
71 /* Sampling types */
72 enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
73
74 struct cpu_dbs_info_s {
75         cputime64_t prev_cpu_idle;
76         cputime64_t prev_cpu_wall;
77         cputime64_t prev_cpu_nice;
78         struct cpufreq_policy *cur_policy;
79         struct delayed_work work;
80         struct cpufreq_frequency_table *freq_table;
81         unsigned int freq_lo;
82         unsigned int freq_lo_jiffies;
83         unsigned int freq_hi_jiffies;
84         int cpu;
85         unsigned int sample_type:1;
86         /*
87          * percpu mutex that serializes governor limit change with
88          * do_dbs_timer invocation. We do not want do_dbs_timer to run
89          * when user is changing the governor or limits.
90          */
91         struct mutex timer_mutex;
92 };
93 static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info);
94
95 static unsigned int dbs_enable; /* number of CPUs using this policy */
96
97 /*
98  * dbs_mutex protects data in dbs_tuners_ins from concurrent changes on
99  * different CPUs. It protects dbs_enable in governor start/stop.
100  */
101 static DEFINE_MUTEX(dbs_mutex);
102
103 static struct workqueue_struct  *kondemand_wq;
104
105 static struct dbs_tuners {
106         unsigned int sampling_rate;
107         unsigned int up_threshold;
108         unsigned int down_differential;
109         unsigned int ignore_nice;
110         unsigned int powersave_bias;
111 } dbs_tuners_ins = {
112         .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
113         .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL,
114         .ignore_nice = 0,
115         .powersave_bias = 0,
116 };
117
118 static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
119                                                         cputime64_t *wall)
120 {
121         cputime64_t idle_time;
122         cputime64_t cur_wall_time;
123         cputime64_t busy_time;
124
125         cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
126         busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user,
127                         kstat_cpu(cpu).cpustat.system);
128
129         busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq);
130         busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq);
131         busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal);
132         busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.nice);
133
134         idle_time = cputime64_sub(cur_wall_time, busy_time);
135         if (wall)
136                 *wall = (cputime64_t)jiffies_to_usecs(cur_wall_time);
137
138         return (cputime64_t)jiffies_to_usecs(idle_time);
139 }
140
141 static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall)
142 {
143         u64 idle_time = get_cpu_idle_time_us(cpu, wall);
144
145         if (idle_time == -1ULL)
146                 return get_cpu_idle_time_jiffy(cpu, wall);
147
148         return idle_time;
149 }
150
151 /*
152  * Find right freq to be set now with powersave_bias on.
153  * Returns the freq_hi to be used right now and will set freq_hi_jiffies,
154  * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs.
155  */
156 static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
157                                           unsigned int freq_next,
158                                           unsigned int relation)
159 {
160         unsigned int freq_req, freq_reduc, freq_avg;
161         unsigned int freq_hi, freq_lo;
162         unsigned int index = 0;
163         unsigned int jiffies_total, jiffies_hi, jiffies_lo;
164         struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info,
165                                                    policy->cpu);
166
167         if (!dbs_info->freq_table) {
168                 dbs_info->freq_lo = 0;
169                 dbs_info->freq_lo_jiffies = 0;
170                 return freq_next;
171         }
172
173         cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next,
174                         relation, &index);
175         freq_req = dbs_info->freq_table[index].frequency;
176         freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000;
177         freq_avg = freq_req - freq_reduc;
178
179         /* Find freq bounds for freq_avg in freq_table */
180         index = 0;
181         cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
182                         CPUFREQ_RELATION_H, &index);
183         freq_lo = dbs_info->freq_table[index].frequency;
184         index = 0;
185         cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
186                         CPUFREQ_RELATION_L, &index);
187         freq_hi = dbs_info->freq_table[index].frequency;
188
189         /* Find out how long we have to be in hi and lo freqs */
190         if (freq_hi == freq_lo) {
191                 dbs_info->freq_lo = 0;
192                 dbs_info->freq_lo_jiffies = 0;
193                 return freq_lo;
194         }
195         jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
196         jiffies_hi = (freq_avg - freq_lo) * jiffies_total;
197         jiffies_hi += ((freq_hi - freq_lo) / 2);
198         jiffies_hi /= (freq_hi - freq_lo);
199         jiffies_lo = jiffies_total - jiffies_hi;
200         dbs_info->freq_lo = freq_lo;
201         dbs_info->freq_lo_jiffies = jiffies_lo;
202         dbs_info->freq_hi_jiffies = jiffies_hi;
203         return freq_hi;
204 }
205
206 static void ondemand_powersave_bias_init_cpu(int cpu)
207 {
208         struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
209         dbs_info->freq_table = cpufreq_frequency_get_table(cpu);
210         dbs_info->freq_lo = 0;
211 }
212
213 static void ondemand_powersave_bias_init(void)
214 {
215         int i;
216         for_each_online_cpu(i) {
217                 ondemand_powersave_bias_init_cpu(i);
218         }
219 }
220
221 /************************** sysfs interface ************************/
222
223 static ssize_t show_sampling_rate_max(struct kobject *kobj,
224                                       struct attribute *attr, char *buf)
225 {
226         printk_once(KERN_INFO "CPUFREQ: ondemand sampling_rate_max "
227                "sysfs file is deprecated - used by: %s\n", current->comm);
228         return sprintf(buf, "%u\n", -1U);
229 }
230
231 static ssize_t show_sampling_rate_min(struct kobject *kobj,
232                                       struct attribute *attr, char *buf)
233 {
234         return sprintf(buf, "%u\n", min_sampling_rate);
235 }
236
237 define_one_global_ro(sampling_rate_max);
238 define_one_global_ro(sampling_rate_min);
239
240 /* cpufreq_ondemand Governor Tunables */
241 #define show_one(file_name, object)                                     \
242 static ssize_t show_##file_name                                         \
243 (struct kobject *kobj, struct attribute *attr, char *buf)              \
244 {                                                                       \
245         return sprintf(buf, "%u\n", dbs_tuners_ins.object);             \
246 }
247 show_one(sampling_rate, sampling_rate);
248 show_one(up_threshold, up_threshold);
249 show_one(ignore_nice_load, ignore_nice);
250 show_one(powersave_bias, powersave_bias);
251
252 /*** delete after deprecation time ***/
253
254 #define DEPRECATION_MSG(file_name)                                      \
255         printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs "       \
256                     "interface is deprecated - " #file_name "\n");
257
258 #define show_one_old(file_name)                                         \
259 static ssize_t show_##file_name##_old                                   \
260 (struct cpufreq_policy *unused, char *buf)                              \
261 {                                                                       \
262         printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs "       \
263                     "interface is deprecated - " #file_name "\n");      \
264         return show_##file_name(NULL, NULL, buf);                       \
265 }
266 show_one_old(sampling_rate);
267 show_one_old(up_threshold);
268 show_one_old(ignore_nice_load);
269 show_one_old(powersave_bias);
270 show_one_old(sampling_rate_min);
271 show_one_old(sampling_rate_max);
272
273 cpufreq_freq_attr_ro_old(sampling_rate_min);
274 cpufreq_freq_attr_ro_old(sampling_rate_max);
275
276 /*** delete after deprecation time ***/
277
278 static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b,
279                                    const char *buf, size_t count)
280 {
281         unsigned int input;
282         int ret;
283         ret = sscanf(buf, "%u", &input);
284         if (ret != 1)
285                 return -EINVAL;
286
287         mutex_lock(&dbs_mutex);
288         dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate);
289         mutex_unlock(&dbs_mutex);
290
291         return count;
292 }
293
294 static ssize_t store_up_threshold(struct kobject *a, struct attribute *b,
295                                   const char *buf, size_t count)
296 {
297         unsigned int input;
298         int ret;
299         ret = sscanf(buf, "%u", &input);
300
301         if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD ||
302                         input < MIN_FREQUENCY_UP_THRESHOLD) {
303                 return -EINVAL;
304         }
305
306         mutex_lock(&dbs_mutex);
307         dbs_tuners_ins.up_threshold = input;
308         mutex_unlock(&dbs_mutex);
309
310         return count;
311 }
312
313 static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b,
314                                       const char *buf, size_t count)
315 {
316         unsigned int input;
317         int ret;
318
319         unsigned int j;
320
321         ret = sscanf(buf, "%u", &input);
322         if (ret != 1)
323                 return -EINVAL;
324
325         if (input > 1)
326                 input = 1;
327
328         mutex_lock(&dbs_mutex);
329         if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */
330                 mutex_unlock(&dbs_mutex);
331                 return count;
332         }
333         dbs_tuners_ins.ignore_nice = input;
334
335         /* we need to re-evaluate prev_cpu_idle */
336         for_each_online_cpu(j) {
337                 struct cpu_dbs_info_s *dbs_info;
338                 dbs_info = &per_cpu(od_cpu_dbs_info, j);
339                 dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
340                                                 &dbs_info->prev_cpu_wall);
341                 if (dbs_tuners_ins.ignore_nice)
342                         dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
343
344         }
345         mutex_unlock(&dbs_mutex);
346
347         return count;
348 }
349
350 static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b,
351                                     const char *buf, size_t count)
352 {
353         unsigned int input;
354         int ret;
355         ret = sscanf(buf, "%u", &input);
356
357         if (ret != 1)
358                 return -EINVAL;
359
360         if (input > 1000)
361                 input = 1000;
362
363         mutex_lock(&dbs_mutex);
364         dbs_tuners_ins.powersave_bias = input;
365         ondemand_powersave_bias_init();
366         mutex_unlock(&dbs_mutex);
367
368         return count;
369 }
370
371 define_one_global_rw(sampling_rate);
372 define_one_global_rw(up_threshold);
373 define_one_global_rw(ignore_nice_load);
374 define_one_global_rw(powersave_bias);
375
376 static struct attribute *dbs_attributes[] = {
377         &sampling_rate_max.attr,
378         &sampling_rate_min.attr,
379         &sampling_rate.attr,
380         &up_threshold.attr,
381         &ignore_nice_load.attr,
382         &powersave_bias.attr,
383         NULL
384 };
385
386 static struct attribute_group dbs_attr_group = {
387         .attrs = dbs_attributes,
388         .name = "ondemand",
389 };
390
391 /*** delete after deprecation time ***/
392
393 #define write_one_old(file_name)                                        \
394 static ssize_t store_##file_name##_old                                  \
395 (struct cpufreq_policy *unused, const char *buf, size_t count)          \
396 {                                                                       \
397        printk_once(KERN_INFO "CPUFREQ: Per core ondemand sysfs "        \
398                    "interface is deprecated - " #file_name "\n");       \
399        return store_##file_name(NULL, NULL, buf, count);                \
400 }
401 write_one_old(sampling_rate);
402 write_one_old(up_threshold);
403 write_one_old(ignore_nice_load);
404 write_one_old(powersave_bias);
405
406 cpufreq_freq_attr_rw_old(sampling_rate);
407 cpufreq_freq_attr_rw_old(up_threshold);
408 cpufreq_freq_attr_rw_old(ignore_nice_load);
409 cpufreq_freq_attr_rw_old(powersave_bias);
410
411 static struct attribute *dbs_attributes_old[] = {
412        &sampling_rate_max_old.attr,
413        &sampling_rate_min_old.attr,
414        &sampling_rate_old.attr,
415        &up_threshold_old.attr,
416        &ignore_nice_load_old.attr,
417        &powersave_bias_old.attr,
418        NULL
419 };
420
421 static struct attribute_group dbs_attr_group_old = {
422        .attrs = dbs_attributes_old,
423        .name = "ondemand",
424 };
425
426 /*** delete after deprecation time ***/
427
428 /************************** sysfs end ************************/
429
430 static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
431 {
432         unsigned int max_load_freq;
433
434         struct cpufreq_policy *policy;
435         unsigned int j;
436
437         this_dbs_info->freq_lo = 0;
438         policy = this_dbs_info->cur_policy;
439
440         /*
441          * Every sampling_rate, we check, if current idle time is less
442          * than 20% (default), then we try to increase frequency
443          * Every sampling_rate, we look for a the lowest
444          * frequency which can sustain the load while keeping idle time over
445          * 30%. If such a frequency exist, we try to decrease to this frequency.
446          *
447          * Any frequency increase takes it to the maximum frequency.
448          * Frequency reduction happens at minimum steps of
449          * 5% (default) of current frequency
450          */
451
452         /* Get Absolute Load - in terms of freq */
453         max_load_freq = 0;
454
455         for_each_cpu(j, policy->cpus) {
456                 struct cpu_dbs_info_s *j_dbs_info;
457                 cputime64_t cur_wall_time, cur_idle_time;
458                 unsigned int idle_time, wall_time;
459                 unsigned int load, load_freq;
460                 int freq_avg;
461
462                 j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
463
464                 cur_idle_time = get_cpu_idle_time(j, &cur_wall_time);
465
466                 wall_time = (unsigned int) cputime64_sub(cur_wall_time,
467                                 j_dbs_info->prev_cpu_wall);
468                 j_dbs_info->prev_cpu_wall = cur_wall_time;
469
470                 idle_time = (unsigned int) cputime64_sub(cur_idle_time,
471                                 j_dbs_info->prev_cpu_idle);
472                 j_dbs_info->prev_cpu_idle = cur_idle_time;
473
474                 if (dbs_tuners_ins.ignore_nice) {
475                         cputime64_t cur_nice;
476                         unsigned long cur_nice_jiffies;
477
478                         cur_nice = cputime64_sub(kstat_cpu(j).cpustat.nice,
479                                          j_dbs_info->prev_cpu_nice);
480                         /*
481                          * Assumption: nice time between sampling periods will
482                          * be less than 2^32 jiffies for 32 bit sys
483                          */
484                         cur_nice_jiffies = (unsigned long)
485                                         cputime64_to_jiffies64(cur_nice);
486
487                         j_dbs_info->prev_cpu_nice = kstat_cpu(j).cpustat.nice;
488                         idle_time += jiffies_to_usecs(cur_nice_jiffies);
489                 }
490
491                 if (unlikely(!wall_time || wall_time < idle_time))
492                         continue;
493
494                 load = 100 * (wall_time - idle_time) / wall_time;
495
496                 freq_avg = __cpufreq_driver_getavg(policy, j);
497                 if (freq_avg <= 0)
498                         freq_avg = policy->cur;
499
500                 load_freq = load * freq_avg;
501                 if (load_freq > max_load_freq)
502                         max_load_freq = load_freq;
503         }
504
505         /* Check for frequency increase */
506         if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) {
507                 /* if we are already at full speed then break out early */
508                 if (!dbs_tuners_ins.powersave_bias) {
509                         if (policy->cur == policy->max)
510                                 return;
511
512                         __cpufreq_driver_target(policy, policy->max,
513                                 CPUFREQ_RELATION_H);
514                 } else {
515                         int freq = powersave_bias_target(policy, policy->max,
516                                         CPUFREQ_RELATION_H);
517                         __cpufreq_driver_target(policy, freq,
518                                 CPUFREQ_RELATION_L);
519                 }
520                 return;
521         }
522
523         /* Check for frequency decrease */
524         /* if we cannot reduce the frequency anymore, break out early */
525         if (policy->cur == policy->min)
526                 return;
527
528         /*
529          * The optimal frequency is the frequency that is the lowest that
530          * can support the current CPU usage without triggering the up
531          * policy. To be safe, we focus 10 points under the threshold.
532          */
533         if (max_load_freq <
534             (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) *
535              policy->cur) {
536                 unsigned int freq_next;
537                 freq_next = max_load_freq /
538                                 (dbs_tuners_ins.up_threshold -
539                                  dbs_tuners_ins.down_differential);
540
541                 if (freq_next < policy->min)
542                         freq_next = policy->min;
543
544                 if (!dbs_tuners_ins.powersave_bias) {
545                         __cpufreq_driver_target(policy, freq_next,
546                                         CPUFREQ_RELATION_L);
547                 } else {
548                         int freq = powersave_bias_target(policy, freq_next,
549                                         CPUFREQ_RELATION_L);
550                         __cpufreq_driver_target(policy, freq,
551                                 CPUFREQ_RELATION_L);
552                 }
553         }
554 }
555
556 static void do_dbs_timer(struct work_struct *work)
557 {
558         struct cpu_dbs_info_s *dbs_info =
559                 container_of(work, struct cpu_dbs_info_s, work.work);
560         unsigned int cpu = dbs_info->cpu;
561         int sample_type = dbs_info->sample_type;
562
563         /* We want all CPUs to do sampling nearly on same jiffy */
564         int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
565
566         delay -= jiffies % delay;
567         mutex_lock(&dbs_info->timer_mutex);
568
569         /* Common NORMAL_SAMPLE setup */
570         dbs_info->sample_type = DBS_NORMAL_SAMPLE;
571         if (!dbs_tuners_ins.powersave_bias ||
572             sample_type == DBS_NORMAL_SAMPLE) {
573                 dbs_check_cpu(dbs_info);
574                 if (dbs_info->freq_lo) {
575                         /* Setup timer for SUB_SAMPLE */
576                         dbs_info->sample_type = DBS_SUB_SAMPLE;
577                         delay = dbs_info->freq_hi_jiffies;
578                 }
579         } else {
580                 __cpufreq_driver_target(dbs_info->cur_policy,
581                         dbs_info->freq_lo, CPUFREQ_RELATION_H);
582         }
583         queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay);
584         mutex_unlock(&dbs_info->timer_mutex);
585 }
586
587 static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
588 {
589         /* We want all CPUs to do sampling nearly on same jiffy */
590         int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
591         delay -= jiffies % delay;
592
593         dbs_info->sample_type = DBS_NORMAL_SAMPLE;
594         INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);
595         queue_delayed_work_on(dbs_info->cpu, kondemand_wq, &dbs_info->work,
596                 delay);
597 }
598
599 static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
600 {
601         cancel_delayed_work_sync(&dbs_info->work);
602 }
603
604 static int cpufreq_governor_dbs(struct cpufreq_policy *policy,
605                                    unsigned int event)
606 {
607         unsigned int cpu = policy->cpu;
608         struct cpu_dbs_info_s *this_dbs_info;
609         unsigned int j;
610         int rc;
611
612         this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu);
613
614         switch (event) {
615         case CPUFREQ_GOV_START:
616                 if ((!cpu_online(cpu)) || (!policy->cur))
617                         return -EINVAL;
618
619                 mutex_lock(&dbs_mutex);
620
621                 rc = sysfs_create_group(&policy->kobj, &dbs_attr_group_old);
622                 if (rc) {
623                         mutex_unlock(&dbs_mutex);
624                         return rc;
625                 }
626
627                 dbs_enable++;
628                 for_each_cpu(j, policy->cpus) {
629                         struct cpu_dbs_info_s *j_dbs_info;
630                         j_dbs_info = &per_cpu(od_cpu_dbs_info, j);
631                         j_dbs_info->cur_policy = policy;
632
633                         j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j,
634                                                 &j_dbs_info->prev_cpu_wall);
635                         if (dbs_tuners_ins.ignore_nice) {
636                                 j_dbs_info->prev_cpu_nice =
637                                                 kstat_cpu(j).cpustat.nice;
638                         }
639                 }
640                 this_dbs_info->cpu = cpu;
641                 ondemand_powersave_bias_init_cpu(cpu);
642                 /*
643                  * Start the timerschedule work, when this governor
644                  * is used for first time
645                  */
646                 if (dbs_enable == 1) {
647                         unsigned int latency;
648
649                         rc = sysfs_create_group(cpufreq_global_kobject,
650                                                 &dbs_attr_group);
651                         if (rc) {
652                                 mutex_unlock(&dbs_mutex);
653                                 return rc;
654                         }
655
656                         /* policy latency is in nS. Convert it to uS first */
657                         latency = policy->cpuinfo.transition_latency / 1000;
658                         if (latency == 0)
659                                 latency = 1;
660                         /* Bring kernel and HW constraints together */
661                         min_sampling_rate = max(min_sampling_rate,
662                                         MIN_LATENCY_MULTIPLIER * latency);
663                         dbs_tuners_ins.sampling_rate =
664                                 max(min_sampling_rate,
665                                     latency * LATENCY_MULTIPLIER);
666                 }
667                 mutex_unlock(&dbs_mutex);
668
669                 mutex_init(&this_dbs_info->timer_mutex);
670                 dbs_timer_init(this_dbs_info);
671                 break;
672
673         case CPUFREQ_GOV_STOP:
674                 dbs_timer_exit(this_dbs_info);
675
676                 mutex_lock(&dbs_mutex);
677                 sysfs_remove_group(&policy->kobj, &dbs_attr_group_old);
678                 mutex_destroy(&this_dbs_info->timer_mutex);
679                 dbs_enable--;
680                 mutex_unlock(&dbs_mutex);
681                 if (!dbs_enable)
682                         sysfs_remove_group(cpufreq_global_kobject,
683                                            &dbs_attr_group);
684
685                 break;
686
687         case CPUFREQ_GOV_LIMITS:
688                 mutex_lock(&this_dbs_info->timer_mutex);
689                 if (policy->max < this_dbs_info->cur_policy->cur)
690                         __cpufreq_driver_target(this_dbs_info->cur_policy,
691                                 policy->max, CPUFREQ_RELATION_H);
692                 else if (policy->min > this_dbs_info->cur_policy->cur)
693                         __cpufreq_driver_target(this_dbs_info->cur_policy,
694                                 policy->min, CPUFREQ_RELATION_L);
695                 mutex_unlock(&this_dbs_info->timer_mutex);
696                 break;
697         }
698         return 0;
699 }
700
701 static int __init cpufreq_gov_dbs_init(void)
702 {
703         int err;
704         cputime64_t wall;
705         u64 idle_time;
706         int cpu = get_cpu();
707
708         idle_time = get_cpu_idle_time_us(cpu, &wall);
709         put_cpu();
710         if (idle_time != -1ULL) {
711                 /* Idle micro accounting is supported. Use finer thresholds */
712                 dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD;
713                 dbs_tuners_ins.down_differential =
714                                         MICRO_FREQUENCY_DOWN_DIFFERENTIAL;
715                 /*
716                  * In no_hz/micro accounting case we set the minimum frequency
717                  * not depending on HZ, but fixed (very low). The deferred
718                  * timer might skip some samples if idle/sleeping as needed.
719                 */
720                 min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE;
721         } else {
722                 /* For correct statistics, we need 10 ticks for each measure */
723                 min_sampling_rate =
724                         MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10);
725         }
726
727         kondemand_wq = create_workqueue("kondemand");
728         if (!kondemand_wq) {
729                 printk(KERN_ERR "Creation of kondemand failed\n");
730                 return -EFAULT;
731         }
732         err = cpufreq_register_governor(&cpufreq_gov_ondemand);
733         if (err)
734                 destroy_workqueue(kondemand_wq);
735
736         return err;
737 }
738
739 static void __exit cpufreq_gov_dbs_exit(void)
740 {
741         cpufreq_unregister_governor(&cpufreq_gov_ondemand);
742         destroy_workqueue(kondemand_wq);
743 }
744
745
746 MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>");
747 MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>");
748 MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for "
749         "Low Latency Frequency Transition capable processors");
750 MODULE_LICENSE("GPL");
751
752 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND
753 fs_initcall(cpufreq_gov_dbs_init);
754 #else
755 module_init(cpufreq_gov_dbs_init);
756 #endif
757 module_exit(cpufreq_gov_dbs_exit);