ARM: sunxi_defconfig: enable CONFIG_REGULATOR
[cascardo/linux.git] / mm / mprotect.c
1 /*
2  *  mm/mprotect.c
3  *
4  *  (C) Copyright 1994 Linus Torvalds
5  *  (C) Copyright 2002 Christoph Hellwig
6  *
7  *  Address space accounting code       <alan@lxorguk.ukuu.org.uk>
8  *  (C) Copyright 2002 Red Hat Inc, All Rights Reserved
9  */
10
11 #include <linux/mm.h>
12 #include <linux/hugetlb.h>
13 #include <linux/shm.h>
14 #include <linux/mman.h>
15 #include <linux/fs.h>
16 #include <linux/highmem.h>
17 #include <linux/security.h>
18 #include <linux/mempolicy.h>
19 #include <linux/personality.h>
20 #include <linux/syscalls.h>
21 #include <linux/swap.h>
22 #include <linux/swapops.h>
23 #include <linux/mmu_notifier.h>
24 #include <linux/migrate.h>
25 #include <linux/perf_event.h>
26 #include <linux/ksm.h>
27 #include <asm/uaccess.h>
28 #include <asm/pgtable.h>
29 #include <asm/cacheflush.h>
30 #include <asm/tlbflush.h>
31
32 /*
33  * For a prot_numa update we only hold mmap_sem for read so there is a
34  * potential race with faulting where a pmd was temporarily none. This
35  * function checks for a transhuge pmd under the appropriate lock. It
36  * returns a pte if it was successfully locked or NULL if it raced with
37  * a transhuge insertion.
38  */
39 static pte_t *lock_pte_protection(struct vm_area_struct *vma, pmd_t *pmd,
40                         unsigned long addr, int prot_numa, spinlock_t **ptl)
41 {
42         pte_t *pte;
43         spinlock_t *pmdl;
44
45         /* !prot_numa is protected by mmap_sem held for write */
46         if (!prot_numa)
47                 return pte_offset_map_lock(vma->vm_mm, pmd, addr, ptl);
48
49         pmdl = pmd_lock(vma->vm_mm, pmd);
50         if (unlikely(pmd_trans_huge(*pmd) || pmd_none(*pmd))) {
51                 spin_unlock(pmdl);
52                 return NULL;
53         }
54
55         pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, ptl);
56         spin_unlock(pmdl);
57         return pte;
58 }
59
60 static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
61                 unsigned long addr, unsigned long end, pgprot_t newprot,
62                 int dirty_accountable, int prot_numa)
63 {
64         struct mm_struct *mm = vma->vm_mm;
65         pte_t *pte, oldpte;
66         spinlock_t *ptl;
67         unsigned long pages = 0;
68
69         pte = lock_pte_protection(vma, pmd, addr, prot_numa, &ptl);
70         if (!pte)
71                 return 0;
72
73         arch_enter_lazy_mmu_mode();
74         do {
75                 oldpte = *pte;
76                 if (pte_present(oldpte)) {
77                         pte_t ptent;
78                         bool updated = false;
79
80                         if (!prot_numa) {
81                                 ptent = ptep_modify_prot_start(mm, addr, pte);
82                                 if (pte_numa(ptent))
83                                         ptent = pte_mknonnuma(ptent);
84                                 ptent = pte_modify(ptent, newprot);
85                                 /*
86                                  * Avoid taking write faults for pages we
87                                  * know to be dirty.
88                                  */
89                                 if (dirty_accountable && pte_dirty(ptent) &&
90                                     (pte_soft_dirty(ptent) ||
91                                      !(vma->vm_flags & VM_SOFTDIRTY)))
92                                         ptent = pte_mkwrite(ptent);
93                                 ptep_modify_prot_commit(mm, addr, pte, ptent);
94                                 updated = true;
95                         } else {
96                                 struct page *page;
97
98                                 page = vm_normal_page(vma, addr, oldpte);
99                                 if (page && !PageKsm(page)) {
100                                         if (!pte_numa(oldpte)) {
101                                                 ptep_set_numa(mm, addr, pte);
102                                                 updated = true;
103                                         }
104                                 }
105                         }
106                         if (updated)
107                                 pages++;
108                 } else if (IS_ENABLED(CONFIG_MIGRATION) && !pte_file(oldpte)) {
109                         swp_entry_t entry = pte_to_swp_entry(oldpte);
110
111                         if (is_write_migration_entry(entry)) {
112                                 pte_t newpte;
113                                 /*
114                                  * A protection check is difficult so
115                                  * just be safe and disable write
116                                  */
117                                 make_migration_entry_read(&entry);
118                                 newpte = swp_entry_to_pte(entry);
119                                 if (pte_swp_soft_dirty(oldpte))
120                                         newpte = pte_swp_mksoft_dirty(newpte);
121                                 set_pte_at(mm, addr, pte, newpte);
122
123                                 pages++;
124                         }
125                 }
126         } while (pte++, addr += PAGE_SIZE, addr != end);
127         arch_leave_lazy_mmu_mode();
128         pte_unmap_unlock(pte - 1, ptl);
129
130         return pages;
131 }
132
133 static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
134                 pud_t *pud, unsigned long addr, unsigned long end,
135                 pgprot_t newprot, int dirty_accountable, int prot_numa)
136 {
137         pmd_t *pmd;
138         struct mm_struct *mm = vma->vm_mm;
139         unsigned long next;
140         unsigned long pages = 0;
141         unsigned long nr_huge_updates = 0;
142         unsigned long mni_start = 0;
143
144         pmd = pmd_offset(pud, addr);
145         do {
146                 unsigned long this_pages;
147
148                 next = pmd_addr_end(addr, end);
149                 if (!pmd_trans_huge(*pmd) && pmd_none_or_clear_bad(pmd))
150                         continue;
151
152                 /* invoke the mmu notifier if the pmd is populated */
153                 if (!mni_start) {
154                         mni_start = addr;
155                         mmu_notifier_invalidate_range_start(mm, mni_start, end);
156                 }
157
158                 if (pmd_trans_huge(*pmd)) {
159                         if (next - addr != HPAGE_PMD_SIZE)
160                                 split_huge_page_pmd(vma, addr, pmd);
161                         else {
162                                 int nr_ptes = change_huge_pmd(vma, pmd, addr,
163                                                 newprot, prot_numa);
164
165                                 if (nr_ptes) {
166                                         if (nr_ptes == HPAGE_PMD_NR) {
167                                                 pages += HPAGE_PMD_NR;
168                                                 nr_huge_updates++;
169                                         }
170
171                                         /* huge pmd was handled */
172                                         continue;
173                                 }
174                         }
175                         /* fall through, the trans huge pmd just split */
176                 }
177                 this_pages = change_pte_range(vma, pmd, addr, next, newprot,
178                                  dirty_accountable, prot_numa);
179                 pages += this_pages;
180         } while (pmd++, addr = next, addr != end);
181
182         if (mni_start)
183                 mmu_notifier_invalidate_range_end(mm, mni_start, end);
184
185         if (nr_huge_updates)
186                 count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates);
187         return pages;
188 }
189
190 static inline unsigned long change_pud_range(struct vm_area_struct *vma,
191                 pgd_t *pgd, unsigned long addr, unsigned long end,
192                 pgprot_t newprot, int dirty_accountable, int prot_numa)
193 {
194         pud_t *pud;
195         unsigned long next;
196         unsigned long pages = 0;
197
198         pud = pud_offset(pgd, addr);
199         do {
200                 next = pud_addr_end(addr, end);
201                 if (pud_none_or_clear_bad(pud))
202                         continue;
203                 pages += change_pmd_range(vma, pud, addr, next, newprot,
204                                  dirty_accountable, prot_numa);
205         } while (pud++, addr = next, addr != end);
206
207         return pages;
208 }
209
210 static unsigned long change_protection_range(struct vm_area_struct *vma,
211                 unsigned long addr, unsigned long end, pgprot_t newprot,
212                 int dirty_accountable, int prot_numa)
213 {
214         struct mm_struct *mm = vma->vm_mm;
215         pgd_t *pgd;
216         unsigned long next;
217         unsigned long start = addr;
218         unsigned long pages = 0;
219
220         BUG_ON(addr >= end);
221         pgd = pgd_offset(mm, addr);
222         flush_cache_range(vma, addr, end);
223         set_tlb_flush_pending(mm);
224         do {
225                 next = pgd_addr_end(addr, end);
226                 if (pgd_none_or_clear_bad(pgd))
227                         continue;
228                 pages += change_pud_range(vma, pgd, addr, next, newprot,
229                                  dirty_accountable, prot_numa);
230         } while (pgd++, addr = next, addr != end);
231
232         /* Only flush the TLB if we actually modified any entries: */
233         if (pages)
234                 flush_tlb_range(vma, start, end);
235         clear_tlb_flush_pending(mm);
236
237         return pages;
238 }
239
240 unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
241                        unsigned long end, pgprot_t newprot,
242                        int dirty_accountable, int prot_numa)
243 {
244         unsigned long pages;
245
246         if (is_vm_hugetlb_page(vma))
247                 pages = hugetlb_change_protection(vma, start, end, newprot);
248         else
249                 pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa);
250
251         return pages;
252 }
253
254 int
255 mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
256         unsigned long start, unsigned long end, unsigned long newflags)
257 {
258         struct mm_struct *mm = vma->vm_mm;
259         unsigned long oldflags = vma->vm_flags;
260         long nrpages = (end - start) >> PAGE_SHIFT;
261         unsigned long charged = 0;
262         pgoff_t pgoff;
263         int error;
264         int dirty_accountable = 0;
265
266         if (newflags == oldflags) {
267                 *pprev = vma;
268                 return 0;
269         }
270
271         /*
272          * If we make a private mapping writable we increase our commit;
273          * but (without finer accounting) cannot reduce our commit if we
274          * make it unwritable again. hugetlb mapping were accounted for
275          * even if read-only so there is no need to account for them here
276          */
277         if (newflags & VM_WRITE) {
278                 if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB|
279                                                 VM_SHARED|VM_NORESERVE))) {
280                         charged = nrpages;
281                         if (security_vm_enough_memory_mm(mm, charged))
282                                 return -ENOMEM;
283                         newflags |= VM_ACCOUNT;
284                 }
285         }
286
287         /*
288          * First try to merge with previous and/or next vma.
289          */
290         pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
291         *pprev = vma_merge(mm, *pprev, start, end, newflags,
292                         vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
293         if (*pprev) {
294                 vma = *pprev;
295                 goto success;
296         }
297
298         *pprev = vma;
299
300         if (start != vma->vm_start) {
301                 error = split_vma(mm, vma, start, 1);
302                 if (error)
303                         goto fail;
304         }
305
306         if (end != vma->vm_end) {
307                 error = split_vma(mm, vma, end, 0);
308                 if (error)
309                         goto fail;
310         }
311
312 success:
313         /*
314          * vm_flags and vm_page_prot are protected by the mmap_sem
315          * held in write mode.
316          */
317         vma->vm_flags = newflags;
318         dirty_accountable = vma_wants_writenotify(vma);
319         vma_set_page_prot(vma);
320
321         change_protection(vma, start, end, vma->vm_page_prot,
322                           dirty_accountable, 0);
323
324         vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
325         vm_stat_account(mm, newflags, vma->vm_file, nrpages);
326         perf_event_mmap(vma);
327         return 0;
328
329 fail:
330         vm_unacct_memory(charged);
331         return error;
332 }
333
334 SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
335                 unsigned long, prot)
336 {
337         unsigned long vm_flags, nstart, end, tmp, reqprot;
338         struct vm_area_struct *vma, *prev;
339         int error = -EINVAL;
340         const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP);
341         prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP);
342         if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */
343                 return -EINVAL;
344
345         if (start & ~PAGE_MASK)
346                 return -EINVAL;
347         if (!len)
348                 return 0;
349         len = PAGE_ALIGN(len);
350         end = start + len;
351         if (end <= start)
352                 return -ENOMEM;
353         if (!arch_validate_prot(prot))
354                 return -EINVAL;
355
356         reqprot = prot;
357         /*
358          * Does the application expect PROT_READ to imply PROT_EXEC:
359          */
360         if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
361                 prot |= PROT_EXEC;
362
363         vm_flags = calc_vm_prot_bits(prot);
364
365         down_write(&current->mm->mmap_sem);
366
367         vma = find_vma(current->mm, start);
368         error = -ENOMEM;
369         if (!vma)
370                 goto out;
371         prev = vma->vm_prev;
372         if (unlikely(grows & PROT_GROWSDOWN)) {
373                 if (vma->vm_start >= end)
374                         goto out;
375                 start = vma->vm_start;
376                 error = -EINVAL;
377                 if (!(vma->vm_flags & VM_GROWSDOWN))
378                         goto out;
379         } else {
380                 if (vma->vm_start > start)
381                         goto out;
382                 if (unlikely(grows & PROT_GROWSUP)) {
383                         end = vma->vm_end;
384                         error = -EINVAL;
385                         if (!(vma->vm_flags & VM_GROWSUP))
386                                 goto out;
387                 }
388         }
389         if (start > vma->vm_start)
390                 prev = vma;
391
392         for (nstart = start ; ; ) {
393                 unsigned long newflags;
394
395                 /* Here we know that vma->vm_start <= nstart < vma->vm_end. */
396
397                 newflags = vm_flags;
398                 newflags |= (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC));
399
400                 /* newflags >> 4 shift VM_MAY% in place of VM_% */
401                 if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) {
402                         error = -EACCES;
403                         goto out;
404                 }
405
406                 error = security_file_mprotect(vma, reqprot, prot);
407                 if (error)
408                         goto out;
409
410                 tmp = vma->vm_end;
411                 if (tmp > end)
412                         tmp = end;
413                 error = mprotect_fixup(vma, &prev, nstart, tmp, newflags);
414                 if (error)
415                         goto out;
416                 nstart = tmp;
417
418                 if (nstart < prev->vm_end)
419                         nstart = prev->vm_end;
420                 if (nstart >= end)
421                         goto out;
422
423                 vma = prev->vm_next;
424                 if (!vma || vma->vm_start != nstart) {
425                         error = -ENOMEM;
426                         goto out;
427                 }
428         }
429 out:
430         up_write(&current->mm->mmap_sem);
431         return error;
432 }