sched: group scheduling, change how cpu load is calculated
[cascardo/linux.git] / kernel / sched_rt.c
1 /*
2  * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
3  * policies)
4  */
5
6 /*
7  * Update the current task's runtime statistics. Skip current tasks that
8  * are not in our scheduling class.
9  */
10 static void update_curr_rt(struct rq *rq)
11 {
12         struct task_struct *curr = rq->curr;
13         u64 delta_exec;
14
15         if (!task_has_rt_policy(curr))
16                 return;
17
18         delta_exec = rq->clock - curr->se.exec_start;
19         if (unlikely((s64)delta_exec < 0))
20                 delta_exec = 0;
21
22         schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
23
24         curr->se.sum_exec_runtime += delta_exec;
25         curr->se.exec_start = rq->clock;
26         cpuacct_charge(curr, delta_exec);
27 }
28
29 static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
30 {
31         struct rt_prio_array *array = &rq->rt.active;
32
33         list_add_tail(&p->run_list, array->queue + p->prio);
34         __set_bit(p->prio, array->bitmap);
35         inc_cpu_load(rq, p->se.load.weight);
36 }
37
38 /*
39  * Adding/removing a task to/from a priority array:
40  */
41 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
42 {
43         struct rt_prio_array *array = &rq->rt.active;
44
45         update_curr_rt(rq);
46
47         list_del(&p->run_list);
48         if (list_empty(array->queue + p->prio))
49                 __clear_bit(p->prio, array->bitmap);
50         dec_cpu_load(rq, p->se.load.weight);
51 }
52
53 /*
54  * Put task to the end of the run list without the overhead of dequeue
55  * followed by enqueue.
56  */
57 static void requeue_task_rt(struct rq *rq, struct task_struct *p)
58 {
59         struct rt_prio_array *array = &rq->rt.active;
60
61         list_move_tail(&p->run_list, array->queue + p->prio);
62 }
63
64 static void
65 yield_task_rt(struct rq *rq)
66 {
67         requeue_task_rt(rq, rq->curr);
68 }
69
70 /*
71  * Preempt the current task with a newly woken task if needed:
72  */
73 static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
74 {
75         if (p->prio < rq->curr->prio)
76                 resched_task(rq->curr);
77 }
78
79 static struct task_struct *pick_next_task_rt(struct rq *rq)
80 {
81         struct rt_prio_array *array = &rq->rt.active;
82         struct task_struct *next;
83         struct list_head *queue;
84         int idx;
85
86         idx = sched_find_first_bit(array->bitmap);
87         if (idx >= MAX_RT_PRIO)
88                 return NULL;
89
90         queue = array->queue + idx;
91         next = list_entry(queue->next, struct task_struct, run_list);
92
93         next->se.exec_start = rq->clock;
94
95         return next;
96 }
97
98 static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
99 {
100         update_curr_rt(rq);
101         p->se.exec_start = 0;
102 }
103
104 #ifdef CONFIG_SMP
105 /*
106  * Load-balancing iterator. Note: while the runqueue stays locked
107  * during the whole iteration, the current task might be
108  * dequeued so the iterator has to be dequeue-safe. Here we
109  * achieve that by always pre-iterating before returning
110  * the current task:
111  */
112 static struct task_struct *load_balance_start_rt(void *arg)
113 {
114         struct rq *rq = arg;
115         struct rt_prio_array *array = &rq->rt.active;
116         struct list_head *head, *curr;
117         struct task_struct *p;
118         int idx;
119
120         idx = sched_find_first_bit(array->bitmap);
121         if (idx >= MAX_RT_PRIO)
122                 return NULL;
123
124         head = array->queue + idx;
125         curr = head->prev;
126
127         p = list_entry(curr, struct task_struct, run_list);
128
129         curr = curr->prev;
130
131         rq->rt.rt_load_balance_idx = idx;
132         rq->rt.rt_load_balance_head = head;
133         rq->rt.rt_load_balance_curr = curr;
134
135         return p;
136 }
137
138 static struct task_struct *load_balance_next_rt(void *arg)
139 {
140         struct rq *rq = arg;
141         struct rt_prio_array *array = &rq->rt.active;
142         struct list_head *head, *curr;
143         struct task_struct *p;
144         int idx;
145
146         idx = rq->rt.rt_load_balance_idx;
147         head = rq->rt.rt_load_balance_head;
148         curr = rq->rt.rt_load_balance_curr;
149
150         /*
151          * If we arrived back to the head again then
152          * iterate to the next queue (if any):
153          */
154         if (unlikely(head == curr)) {
155                 int next_idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
156
157                 if (next_idx >= MAX_RT_PRIO)
158                         return NULL;
159
160                 idx = next_idx;
161                 head = array->queue + idx;
162                 curr = head->prev;
163
164                 rq->rt.rt_load_balance_idx = idx;
165                 rq->rt.rt_load_balance_head = head;
166         }
167
168         p = list_entry(curr, struct task_struct, run_list);
169
170         curr = curr->prev;
171
172         rq->rt.rt_load_balance_curr = curr;
173
174         return p;
175 }
176
177 static unsigned long
178 load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
179                 unsigned long max_load_move,
180                 struct sched_domain *sd, enum cpu_idle_type idle,
181                 int *all_pinned, int *this_best_prio)
182 {
183         struct rq_iterator rt_rq_iterator;
184
185         rt_rq_iterator.start = load_balance_start_rt;
186         rt_rq_iterator.next = load_balance_next_rt;
187         /* pass 'busiest' rq argument into
188          * load_balance_[start|next]_rt iterators
189          */
190         rt_rq_iterator.arg = busiest;
191
192         return balance_tasks(this_rq, this_cpu, busiest, max_load_move, sd,
193                              idle, all_pinned, this_best_prio, &rt_rq_iterator);
194 }
195
196 static int
197 move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
198                  struct sched_domain *sd, enum cpu_idle_type idle)
199 {
200         struct rq_iterator rt_rq_iterator;
201
202         rt_rq_iterator.start = load_balance_start_rt;
203         rt_rq_iterator.next = load_balance_next_rt;
204         rt_rq_iterator.arg = busiest;
205
206         return iter_move_one_task(this_rq, this_cpu, busiest, sd, idle,
207                                   &rt_rq_iterator);
208 }
209 #endif
210
211 static void task_tick_rt(struct rq *rq, struct task_struct *p)
212 {
213         update_curr_rt(rq);
214
215         /*
216          * RR tasks need a special form of timeslice management.
217          * FIFO tasks have no timeslices.
218          */
219         if (p->policy != SCHED_RR)
220                 return;
221
222         if (--p->time_slice)
223                 return;
224
225         p->time_slice = DEF_TIMESLICE;
226
227         /*
228          * Requeue to the end of queue if we are not the only element
229          * on the queue:
230          */
231         if (p->run_list.prev != p->run_list.next) {
232                 requeue_task_rt(rq, p);
233                 set_tsk_need_resched(p);
234         }
235 }
236
237 static void set_curr_task_rt(struct rq *rq)
238 {
239         struct task_struct *p = rq->curr;
240
241         p->se.exec_start = rq->clock;
242 }
243
244 const struct sched_class rt_sched_class = {
245         .next                   = &fair_sched_class,
246         .enqueue_task           = enqueue_task_rt,
247         .dequeue_task           = dequeue_task_rt,
248         .yield_task             = yield_task_rt,
249
250         .check_preempt_curr     = check_preempt_curr_rt,
251
252         .pick_next_task         = pick_next_task_rt,
253         .put_prev_task          = put_prev_task_rt,
254
255 #ifdef CONFIG_SMP
256         .load_balance           = load_balance_rt,
257         .move_one_task          = move_one_task_rt,
258 #endif
259
260         .set_curr_task          = set_curr_task_rt,
261         .task_tick              = task_tick_rt,
262 };