mm/mmap.c: change __install_special_mapping() args order
[cascardo/linux.git] / mm / list_lru.c
1 /*
2  * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
3  * Authors: David Chinner and Glauber Costa
4  *
5  * Generic LRU infrastructure
6  */
7 #include <linux/kernel.h>
8 #include <linux/module.h>
9 #include <linux/mm.h>
10 #include <linux/list_lru.h>
11 #include <linux/slab.h>
12 #include <linux/mutex.h>
13 #include <linux/memcontrol.h>
14
15 #ifdef CONFIG_MEMCG_KMEM
16 static LIST_HEAD(list_lrus);
17 static DEFINE_MUTEX(list_lrus_mutex);
18
19 static void list_lru_register(struct list_lru *lru)
20 {
21         mutex_lock(&list_lrus_mutex);
22         list_add(&lru->list, &list_lrus);
23         mutex_unlock(&list_lrus_mutex);
24 }
25
26 static void list_lru_unregister(struct list_lru *lru)
27 {
28         mutex_lock(&list_lrus_mutex);
29         list_del(&lru->list);
30         mutex_unlock(&list_lrus_mutex);
31 }
32 #else
33 static void list_lru_register(struct list_lru *lru)
34 {
35 }
36
37 static void list_lru_unregister(struct list_lru *lru)
38 {
39 }
40 #endif /* CONFIG_MEMCG_KMEM */
41
42 #ifdef CONFIG_MEMCG_KMEM
43 static inline bool list_lru_memcg_aware(struct list_lru *lru)
44 {
45         /*
46          * This needs node 0 to be always present, even
47          * in the systems supporting sparse numa ids.
48          */
49         return !!lru->node[0].memcg_lrus;
50 }
51
52 static inline struct list_lru_one *
53 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
54 {
55         /*
56          * The lock protects the array of per cgroup lists from relocation
57          * (see memcg_update_list_lru_node).
58          */
59         lockdep_assert_held(&nlru->lock);
60         if (nlru->memcg_lrus && idx >= 0)
61                 return nlru->memcg_lrus->lru[idx];
62
63         return &nlru->lru;
64 }
65
66 static inline struct list_lru_one *
67 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr)
68 {
69         struct mem_cgroup *memcg;
70
71         if (!nlru->memcg_lrus)
72                 return &nlru->lru;
73
74         memcg = mem_cgroup_from_kmem(ptr);
75         if (!memcg)
76                 return &nlru->lru;
77
78         return list_lru_from_memcg_idx(nlru, memcg_cache_id(memcg));
79 }
80 #else
81 static inline bool list_lru_memcg_aware(struct list_lru *lru)
82 {
83         return false;
84 }
85
86 static inline struct list_lru_one *
87 list_lru_from_memcg_idx(struct list_lru_node *nlru, int idx)
88 {
89         return &nlru->lru;
90 }
91
92 static inline struct list_lru_one *
93 list_lru_from_kmem(struct list_lru_node *nlru, void *ptr)
94 {
95         return &nlru->lru;
96 }
97 #endif /* CONFIG_MEMCG_KMEM */
98
99 bool list_lru_add(struct list_lru *lru, struct list_head *item)
100 {
101         int nid = page_to_nid(virt_to_page(item));
102         struct list_lru_node *nlru = &lru->node[nid];
103         struct list_lru_one *l;
104
105         spin_lock(&nlru->lock);
106         if (list_empty(item)) {
107                 l = list_lru_from_kmem(nlru, item);
108                 list_add_tail(item, &l->list);
109                 l->nr_items++;
110                 spin_unlock(&nlru->lock);
111                 return true;
112         }
113         spin_unlock(&nlru->lock);
114         return false;
115 }
116 EXPORT_SYMBOL_GPL(list_lru_add);
117
118 bool list_lru_del(struct list_lru *lru, struct list_head *item)
119 {
120         int nid = page_to_nid(virt_to_page(item));
121         struct list_lru_node *nlru = &lru->node[nid];
122         struct list_lru_one *l;
123
124         spin_lock(&nlru->lock);
125         if (!list_empty(item)) {
126                 l = list_lru_from_kmem(nlru, item);
127                 list_del_init(item);
128                 l->nr_items--;
129                 spin_unlock(&nlru->lock);
130                 return true;
131         }
132         spin_unlock(&nlru->lock);
133         return false;
134 }
135 EXPORT_SYMBOL_GPL(list_lru_del);
136
137 void list_lru_isolate(struct list_lru_one *list, struct list_head *item)
138 {
139         list_del_init(item);
140         list->nr_items--;
141 }
142 EXPORT_SYMBOL_GPL(list_lru_isolate);
143
144 void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
145                            struct list_head *head)
146 {
147         list_move(item, head);
148         list->nr_items--;
149 }
150 EXPORT_SYMBOL_GPL(list_lru_isolate_move);
151
152 static unsigned long __list_lru_count_one(struct list_lru *lru,
153                                           int nid, int memcg_idx)
154 {
155         struct list_lru_node *nlru = &lru->node[nid];
156         struct list_lru_one *l;
157         unsigned long count;
158
159         spin_lock(&nlru->lock);
160         l = list_lru_from_memcg_idx(nlru, memcg_idx);
161         count = l->nr_items;
162         spin_unlock(&nlru->lock);
163
164         return count;
165 }
166
167 unsigned long list_lru_count_one(struct list_lru *lru,
168                                  int nid, struct mem_cgroup *memcg)
169 {
170         return __list_lru_count_one(lru, nid, memcg_cache_id(memcg));
171 }
172 EXPORT_SYMBOL_GPL(list_lru_count_one);
173
174 unsigned long list_lru_count_node(struct list_lru *lru, int nid)
175 {
176         long count = 0;
177         int memcg_idx;
178
179         count += __list_lru_count_one(lru, nid, -1);
180         if (list_lru_memcg_aware(lru)) {
181                 for_each_memcg_cache_index(memcg_idx)
182                         count += __list_lru_count_one(lru, nid, memcg_idx);
183         }
184         return count;
185 }
186 EXPORT_SYMBOL_GPL(list_lru_count_node);
187
188 static unsigned long
189 __list_lru_walk_one(struct list_lru *lru, int nid, int memcg_idx,
190                     list_lru_walk_cb isolate, void *cb_arg,
191                     unsigned long *nr_to_walk)
192 {
193
194         struct list_lru_node *nlru = &lru->node[nid];
195         struct list_lru_one *l;
196         struct list_head *item, *n;
197         unsigned long isolated = 0;
198
199         spin_lock(&nlru->lock);
200         l = list_lru_from_memcg_idx(nlru, memcg_idx);
201 restart:
202         list_for_each_safe(item, n, &l->list) {
203                 enum lru_status ret;
204
205                 /*
206                  * decrement nr_to_walk first so that we don't livelock if we
207                  * get stuck on large numbesr of LRU_RETRY items
208                  */
209                 if (!*nr_to_walk)
210                         break;
211                 --*nr_to_walk;
212
213                 ret = isolate(item, l, &nlru->lock, cb_arg);
214                 switch (ret) {
215                 case LRU_REMOVED_RETRY:
216                         assert_spin_locked(&nlru->lock);
217                 case LRU_REMOVED:
218                         isolated++;
219                         /*
220                          * If the lru lock has been dropped, our list
221                          * traversal is now invalid and so we have to
222                          * restart from scratch.
223                          */
224                         if (ret == LRU_REMOVED_RETRY)
225                                 goto restart;
226                         break;
227                 case LRU_ROTATE:
228                         list_move_tail(item, &l->list);
229                         break;
230                 case LRU_SKIP:
231                         break;
232                 case LRU_RETRY:
233                         /*
234                          * The lru lock has been dropped, our list traversal is
235                          * now invalid and so we have to restart from scratch.
236                          */
237                         assert_spin_locked(&nlru->lock);
238                         goto restart;
239                 default:
240                         BUG();
241                 }
242         }
243
244         spin_unlock(&nlru->lock);
245         return isolated;
246 }
247
248 unsigned long
249 list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
250                   list_lru_walk_cb isolate, void *cb_arg,
251                   unsigned long *nr_to_walk)
252 {
253         return __list_lru_walk_one(lru, nid, memcg_cache_id(memcg),
254                                    isolate, cb_arg, nr_to_walk);
255 }
256 EXPORT_SYMBOL_GPL(list_lru_walk_one);
257
258 unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
259                                  list_lru_walk_cb isolate, void *cb_arg,
260                                  unsigned long *nr_to_walk)
261 {
262         long isolated = 0;
263         int memcg_idx;
264
265         isolated += __list_lru_walk_one(lru, nid, -1, isolate, cb_arg,
266                                         nr_to_walk);
267         if (*nr_to_walk > 0 && list_lru_memcg_aware(lru)) {
268                 for_each_memcg_cache_index(memcg_idx) {
269                         isolated += __list_lru_walk_one(lru, nid, memcg_idx,
270                                                 isolate, cb_arg, nr_to_walk);
271                         if (*nr_to_walk <= 0)
272                                 break;
273                 }
274         }
275         return isolated;
276 }
277 EXPORT_SYMBOL_GPL(list_lru_walk_node);
278
279 static void init_one_lru(struct list_lru_one *l)
280 {
281         INIT_LIST_HEAD(&l->list);
282         l->nr_items = 0;
283 }
284
285 #ifdef CONFIG_MEMCG_KMEM
286 static void __memcg_destroy_list_lru_node(struct list_lru_memcg *memcg_lrus,
287                                           int begin, int end)
288 {
289         int i;
290
291         for (i = begin; i < end; i++)
292                 kfree(memcg_lrus->lru[i]);
293 }
294
295 static int __memcg_init_list_lru_node(struct list_lru_memcg *memcg_lrus,
296                                       int begin, int end)
297 {
298         int i;
299
300         for (i = begin; i < end; i++) {
301                 struct list_lru_one *l;
302
303                 l = kmalloc(sizeof(struct list_lru_one), GFP_KERNEL);
304                 if (!l)
305                         goto fail;
306
307                 init_one_lru(l);
308                 memcg_lrus->lru[i] = l;
309         }
310         return 0;
311 fail:
312         __memcg_destroy_list_lru_node(memcg_lrus, begin, i - 1);
313         return -ENOMEM;
314 }
315
316 static int memcg_init_list_lru_node(struct list_lru_node *nlru)
317 {
318         int size = memcg_nr_cache_ids;
319
320         nlru->memcg_lrus = kmalloc(size * sizeof(void *), GFP_KERNEL);
321         if (!nlru->memcg_lrus)
322                 return -ENOMEM;
323
324         if (__memcg_init_list_lru_node(nlru->memcg_lrus, 0, size)) {
325                 kfree(nlru->memcg_lrus);
326                 return -ENOMEM;
327         }
328
329         return 0;
330 }
331
332 static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
333 {
334         __memcg_destroy_list_lru_node(nlru->memcg_lrus, 0, memcg_nr_cache_ids);
335         kfree(nlru->memcg_lrus);
336 }
337
338 static int memcg_update_list_lru_node(struct list_lru_node *nlru,
339                                       int old_size, int new_size)
340 {
341         struct list_lru_memcg *old, *new;
342
343         BUG_ON(old_size > new_size);
344
345         old = nlru->memcg_lrus;
346         new = kmalloc(new_size * sizeof(void *), GFP_KERNEL);
347         if (!new)
348                 return -ENOMEM;
349
350         if (__memcg_init_list_lru_node(new, old_size, new_size)) {
351                 kfree(new);
352                 return -ENOMEM;
353         }
354
355         memcpy(new, old, old_size * sizeof(void *));
356
357         /*
358          * The lock guarantees that we won't race with a reader
359          * (see list_lru_from_memcg_idx).
360          *
361          * Since list_lru_{add,del} may be called under an IRQ-safe lock,
362          * we have to use IRQ-safe primitives here to avoid deadlock.
363          */
364         spin_lock_irq(&nlru->lock);
365         nlru->memcg_lrus = new;
366         spin_unlock_irq(&nlru->lock);
367
368         kfree(old);
369         return 0;
370 }
371
372 static void memcg_cancel_update_list_lru_node(struct list_lru_node *nlru,
373                                               int old_size, int new_size)
374 {
375         /* do not bother shrinking the array back to the old size, because we
376          * cannot handle allocation failures here */
377         __memcg_destroy_list_lru_node(nlru->memcg_lrus, old_size, new_size);
378 }
379
380 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
381 {
382         int i;
383
384         if (!memcg_aware)
385                 return 0;
386
387         for_each_node(i) {
388                 if (memcg_init_list_lru_node(&lru->node[i]))
389                         goto fail;
390         }
391         return 0;
392 fail:
393         for (i = i - 1; i >= 0; i--) {
394                 if (!lru->node[i].memcg_lrus)
395                         continue;
396                 memcg_destroy_list_lru_node(&lru->node[i]);
397         }
398         return -ENOMEM;
399 }
400
401 static void memcg_destroy_list_lru(struct list_lru *lru)
402 {
403         int i;
404
405         if (!list_lru_memcg_aware(lru))
406                 return;
407
408         for_each_node(i)
409                 memcg_destroy_list_lru_node(&lru->node[i]);
410 }
411
412 static int memcg_update_list_lru(struct list_lru *lru,
413                                  int old_size, int new_size)
414 {
415         int i;
416
417         if (!list_lru_memcg_aware(lru))
418                 return 0;
419
420         for_each_node(i) {
421                 if (memcg_update_list_lru_node(&lru->node[i],
422                                                old_size, new_size))
423                         goto fail;
424         }
425         return 0;
426 fail:
427         for (i = i - 1; i >= 0; i--) {
428                 if (!lru->node[i].memcg_lrus)
429                         continue;
430
431                 memcg_cancel_update_list_lru_node(&lru->node[i],
432                                                   old_size, new_size);
433         }
434         return -ENOMEM;
435 }
436
437 static void memcg_cancel_update_list_lru(struct list_lru *lru,
438                                          int old_size, int new_size)
439 {
440         int i;
441
442         if (!list_lru_memcg_aware(lru))
443                 return;
444
445         for_each_node(i)
446                 memcg_cancel_update_list_lru_node(&lru->node[i],
447                                                   old_size, new_size);
448 }
449
450 int memcg_update_all_list_lrus(int new_size)
451 {
452         int ret = 0;
453         struct list_lru *lru;
454         int old_size = memcg_nr_cache_ids;
455
456         mutex_lock(&list_lrus_mutex);
457         list_for_each_entry(lru, &list_lrus, list) {
458                 ret = memcg_update_list_lru(lru, old_size, new_size);
459                 if (ret)
460                         goto fail;
461         }
462 out:
463         mutex_unlock(&list_lrus_mutex);
464         return ret;
465 fail:
466         list_for_each_entry_continue_reverse(lru, &list_lrus, list)
467                 memcg_cancel_update_list_lru(lru, old_size, new_size);
468         goto out;
469 }
470
471 static void memcg_drain_list_lru_node(struct list_lru_node *nlru,
472                                       int src_idx, int dst_idx)
473 {
474         struct list_lru_one *src, *dst;
475
476         /*
477          * Since list_lru_{add,del} may be called under an IRQ-safe lock,
478          * we have to use IRQ-safe primitives here to avoid deadlock.
479          */
480         spin_lock_irq(&nlru->lock);
481
482         src = list_lru_from_memcg_idx(nlru, src_idx);
483         dst = list_lru_from_memcg_idx(nlru, dst_idx);
484
485         list_splice_init(&src->list, &dst->list);
486         dst->nr_items += src->nr_items;
487         src->nr_items = 0;
488
489         spin_unlock_irq(&nlru->lock);
490 }
491
492 static void memcg_drain_list_lru(struct list_lru *lru,
493                                  int src_idx, int dst_idx)
494 {
495         int i;
496
497         if (!list_lru_memcg_aware(lru))
498                 return;
499
500         for_each_node(i)
501                 memcg_drain_list_lru_node(&lru->node[i], src_idx, dst_idx);
502 }
503
504 void memcg_drain_all_list_lrus(int src_idx, int dst_idx)
505 {
506         struct list_lru *lru;
507
508         mutex_lock(&list_lrus_mutex);
509         list_for_each_entry(lru, &list_lrus, list)
510                 memcg_drain_list_lru(lru, src_idx, dst_idx);
511         mutex_unlock(&list_lrus_mutex);
512 }
513 #else
514 static int memcg_init_list_lru(struct list_lru *lru, bool memcg_aware)
515 {
516         return 0;
517 }
518
519 static void memcg_destroy_list_lru(struct list_lru *lru)
520 {
521 }
522 #endif /* CONFIG_MEMCG_KMEM */
523
524 int __list_lru_init(struct list_lru *lru, bool memcg_aware,
525                     struct lock_class_key *key)
526 {
527         int i;
528         size_t size = sizeof(*lru->node) * nr_node_ids;
529         int err = -ENOMEM;
530
531         memcg_get_cache_ids();
532
533         lru->node = kzalloc(size, GFP_KERNEL);
534         if (!lru->node)
535                 goto out;
536
537         for_each_node(i) {
538                 spin_lock_init(&lru->node[i].lock);
539                 if (key)
540                         lockdep_set_class(&lru->node[i].lock, key);
541                 init_one_lru(&lru->node[i].lru);
542         }
543
544         err = memcg_init_list_lru(lru, memcg_aware);
545         if (err) {
546                 kfree(lru->node);
547                 goto out;
548         }
549
550         list_lru_register(lru);
551 out:
552         memcg_put_cache_ids();
553         return err;
554 }
555 EXPORT_SYMBOL_GPL(__list_lru_init);
556
557 void list_lru_destroy(struct list_lru *lru)
558 {
559         /* Already destroyed or not yet initialized? */
560         if (!lru->node)
561                 return;
562
563         memcg_get_cache_ids();
564
565         list_lru_unregister(lru);
566
567         memcg_destroy_list_lru(lru);
568         kfree(lru->node);
569         lru->node = NULL;
570
571         memcg_put_cache_ids();
572 }
573 EXPORT_SYMBOL_GPL(list_lru_destroy);