mm: memcontrol: lockless page counters
[cascardo/linux.git] / mm / page_counter.c
1 /*
2  * Lockless hierarchical page accounting & limiting
3  *
4  * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner
5  */
6
7 #include <linux/page_counter.h>
8 #include <linux/atomic.h>
9 #include <linux/kernel.h>
10 #include <linux/string.h>
11 #include <linux/sched.h>
12 #include <linux/bug.h>
13 #include <asm/page.h>
14
15 /**
16  * page_counter_cancel - take pages out of the local counter
17  * @counter: counter
18  * @nr_pages: number of pages to cancel
19  *
20  * Returns whether there are remaining pages in the counter.
21  */
22 int page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
23 {
24         long new;
25
26         new = atomic_long_sub_return(nr_pages, &counter->count);
27
28         /* More uncharges than charges? */
29         WARN_ON_ONCE(new < 0);
30
31         return new > 0;
32 }
33
34 /**
35  * page_counter_charge - hierarchically charge pages
36  * @counter: counter
37  * @nr_pages: number of pages to charge
38  *
39  * NOTE: This does not consider any configured counter limits.
40  */
41 void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
42 {
43         struct page_counter *c;
44
45         for (c = counter; c; c = c->parent) {
46                 long new;
47
48                 new = atomic_long_add_return(nr_pages, &c->count);
49                 /*
50                  * This is indeed racy, but we can live with some
51                  * inaccuracy in the watermark.
52                  */
53                 if (new > c->watermark)
54                         c->watermark = new;
55         }
56 }
57
58 /**
59  * page_counter_try_charge - try to hierarchically charge pages
60  * @counter: counter
61  * @nr_pages: number of pages to charge
62  * @fail: points first counter to hit its limit, if any
63  *
64  * Returns 0 on success, or -ENOMEM and @fail if the counter or one of
65  * its ancestors has hit its configured limit.
66  */
67 int page_counter_try_charge(struct page_counter *counter,
68                             unsigned long nr_pages,
69                             struct page_counter **fail)
70 {
71         struct page_counter *c;
72
73         for (c = counter; c; c = c->parent) {
74                 long new;
75                 /*
76                  * Charge speculatively to avoid an expensive CAS.  If
77                  * a bigger charge fails, it might falsely lock out a
78                  * racing smaller charge and send it into reclaim
79                  * early, but the error is limited to the difference
80                  * between the two sizes, which is less than 2M/4M in
81                  * case of a THP locking out a regular page charge.
82                  *
83                  * The atomic_long_add_return() implies a full memory
84                  * barrier between incrementing the count and reading
85                  * the limit.  When racing with page_counter_limit(),
86                  * we either see the new limit or the setter sees the
87                  * counter has changed and retries.
88                  */
89                 new = atomic_long_add_return(nr_pages, &c->count);
90                 if (new > c->limit) {
91                         atomic_long_sub(nr_pages, &c->count);
92                         /*
93                          * This is racy, but we can live with some
94                          * inaccuracy in the failcnt.
95                          */
96                         c->failcnt++;
97                         *fail = c;
98                         goto failed;
99                 }
100                 /*
101                  * Just like with failcnt, we can live with some
102                  * inaccuracy in the watermark.
103                  */
104                 if (new > c->watermark)
105                         c->watermark = new;
106         }
107         return 0;
108
109 failed:
110         for (c = counter; c != *fail; c = c->parent)
111                 page_counter_cancel(c, nr_pages);
112
113         return -ENOMEM;
114 }
115
116 /**
117  * page_counter_uncharge - hierarchically uncharge pages
118  * @counter: counter
119  * @nr_pages: number of pages to uncharge
120  *
121  * Returns whether there are remaining charges in @counter.
122  */
123 int page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages)
124 {
125         struct page_counter *c;
126         int ret = 1;
127
128         for (c = counter; c; c = c->parent) {
129                 int remainder;
130
131                 remainder = page_counter_cancel(c, nr_pages);
132                 if (c == counter && !remainder)
133                         ret = 0;
134         }
135
136         return ret;
137 }
138
139 /**
140  * page_counter_limit - limit the number of pages allowed
141  * @counter: counter
142  * @limit: limit to set
143  *
144  * Returns 0 on success, -EBUSY if the current number of pages on the
145  * counter already exceeds the specified limit.
146  *
147  * The caller must serialize invocations on the same counter.
148  */
149 int page_counter_limit(struct page_counter *counter, unsigned long limit)
150 {
151         for (;;) {
152                 unsigned long old;
153                 long count;
154
155                 /*
156                  * Update the limit while making sure that it's not
157                  * below the concurrently-changing counter value.
158                  *
159                  * The xchg implies two full memory barriers before
160                  * and after, so the read-swap-read is ordered and
161                  * ensures coherency with page_counter_try_charge():
162                  * that function modifies the count before checking
163                  * the limit, so if it sees the old limit, we see the
164                  * modified counter and retry.
165                  */
166                 count = atomic_long_read(&counter->count);
167
168                 if (count > limit)
169                         return -EBUSY;
170
171                 old = xchg(&counter->limit, limit);
172
173                 if (atomic_long_read(&counter->count) <= count)
174                         return 0;
175
176                 counter->limit = old;
177                 cond_resched();
178         }
179 }
180
181 /**
182  * page_counter_memparse - memparse() for page counter limits
183  * @buf: string to parse
184  * @nr_pages: returns the result in number of pages
185  *
186  * Returns -EINVAL, or 0 and @nr_pages on success.  @nr_pages will be
187  * limited to %PAGE_COUNTER_MAX.
188  */
189 int page_counter_memparse(const char *buf, unsigned long *nr_pages)
190 {
191         char unlimited[] = "-1";
192         char *end;
193         u64 bytes;
194
195         if (!strncmp(buf, unlimited, sizeof(unlimited))) {
196                 *nr_pages = PAGE_COUNTER_MAX;
197                 return 0;
198         }
199
200         bytes = memparse(buf, &end);
201         if (*end != '\0')
202                 return -EINVAL;
203
204         *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX);
205
206         return 0;
207 }