2 * Copyright IBM Corp. 2007, 2011
3 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
6 #include <linux/sched.h>
7 #include <linux/kernel.h>
8 #include <linux/errno.h>
11 #include <linux/swap.h>
12 #include <linux/smp.h>
13 #include <linux/spinlock.h>
14 #include <linux/rcupdate.h>
15 #include <linux/slab.h>
16 #include <linux/swapops.h>
17 #include <linux/sysctl.h>
18 #include <linux/ksm.h>
19 #include <linux/mman.h>
21 #include <asm/pgtable.h>
22 #include <asm/pgalloc.h>
24 #include <asm/tlbflush.h>
25 #include <asm/mmu_context.h>
27 static inline pte_t ptep_flush_direct(struct mm_struct *mm,
28 unsigned long addr, pte_t *ptep)
33 if (unlikely(pte_val(old) & _PAGE_INVALID))
35 atomic_inc(&mm->context.flush_count);
36 if (MACHINE_HAS_TLB_LC &&
37 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
38 __ptep_ipte_local(addr, ptep);
40 __ptep_ipte(addr, ptep);
41 atomic_dec(&mm->context.flush_count);
45 static inline pte_t ptep_flush_lazy(struct mm_struct *mm,
46 unsigned long addr, pte_t *ptep)
51 if (unlikely(pte_val(old) & _PAGE_INVALID))
53 atomic_inc(&mm->context.flush_count);
54 if (cpumask_equal(&mm->context.cpu_attach_mask,
55 cpumask_of(smp_processor_id()))) {
56 pte_val(*ptep) |= _PAGE_INVALID;
57 mm->context.flush_mm = 1;
59 __ptep_ipte(addr, ptep);
60 atomic_dec(&mm->context.flush_count);
64 static inline pgste_t pgste_get_lock(pte_t *ptep)
66 unsigned long new = 0;
73 " nihh %0,0xff7f\n" /* clear PCL bit in old */
74 " oihh %1,0x0080\n" /* set PCL bit in new */
77 : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE])
78 : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory");
83 static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste)
87 " nihh %1,0xff7f\n" /* clear PCL bit */
89 : "=Q" (ptep[PTRS_PER_PTE])
90 : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE])
95 static inline pgste_t pgste_get(pte_t *ptep)
97 unsigned long pgste = 0;
99 pgste = *(unsigned long *)(ptep + PTRS_PER_PTE);
101 return __pgste(pgste);
104 static inline void pgste_set(pte_t *ptep, pgste_t pgste)
107 *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste;
111 static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste,
112 struct mm_struct *mm)
115 unsigned long address, bits, skey;
117 if (!mm_use_skey(mm) || pte_val(pte) & _PAGE_INVALID)
119 address = pte_val(pte) & PAGE_MASK;
120 skey = (unsigned long) page_get_storage_key(address);
121 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
122 /* Transfer page changed & referenced bit to guest bits in pgste */
123 pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */
124 /* Copy page access key and fetch protection bit to pgste */
125 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT);
126 pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
132 static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
133 struct mm_struct *mm)
136 unsigned long address;
139 if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID)
141 VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
142 address = pte_val(entry) & PAGE_MASK;
144 * Set page access key and fetch protection bit from pgste.
145 * The guest C/R information is still in the PGSTE, set real
148 nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
149 nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
150 page_set_storage_key(address, nkey, 0);
154 static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
157 if ((pte_val(entry) & _PAGE_PRESENT) &&
158 (pte_val(entry) & _PAGE_WRITE) &&
159 !(pte_val(entry) & _PAGE_INVALID)) {
160 if (!MACHINE_HAS_ESOP) {
162 * Without enhanced suppression-on-protection force
163 * the dirty bit on for all writable ptes.
165 pte_val(entry) |= _PAGE_DIRTY;
166 pte_val(entry) &= ~_PAGE_PROTECT;
168 if (!(pte_val(entry) & _PAGE_PROTECT))
169 /* This pte allows write access, set user-dirty */
170 pgste_val(pgste) |= PGSTE_UC_BIT;
177 static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
179 pte_t *ptep, pgste_t pgste)
182 if (pgste_val(pgste) & PGSTE_IN_BIT) {
183 pgste_val(pgste) &= ~PGSTE_IN_BIT;
184 ptep_notify(mm, addr, ptep);
190 static inline pgste_t ptep_xchg_start(struct mm_struct *mm,
191 unsigned long addr, pte_t *ptep)
193 pgste_t pgste = __pgste(0);
195 if (mm_has_pgste(mm)) {
196 pgste = pgste_get_lock(ptep);
197 pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
202 static inline void ptep_xchg_commit(struct mm_struct *mm,
203 unsigned long addr, pte_t *ptep,
204 pgste_t pgste, pte_t old, pte_t new)
206 if (mm_has_pgste(mm)) {
207 if (pte_val(old) & _PAGE_INVALID)
208 pgste_set_key(ptep, pgste, new, mm);
209 if (pte_val(new) & _PAGE_INVALID) {
210 pgste = pgste_update_all(old, pgste, mm);
211 if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
212 _PGSTE_GPS_USAGE_UNUSED)
213 pte_val(old) |= _PAGE_UNUSED;
215 pgste = pgste_set_pte(ptep, pgste, new);
216 pgste_set_unlock(ptep, pgste);
222 pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr,
223 pte_t *ptep, pte_t new)
229 pgste = ptep_xchg_start(mm, addr, ptep);
230 old = ptep_flush_direct(mm, addr, ptep);
231 ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
235 EXPORT_SYMBOL(ptep_xchg_direct);
237 pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr,
238 pte_t *ptep, pte_t new)
244 pgste = ptep_xchg_start(mm, addr, ptep);
245 old = ptep_flush_lazy(mm, addr, ptep);
246 ptep_xchg_commit(mm, addr, ptep, pgste, old, new);
250 EXPORT_SYMBOL(ptep_xchg_lazy);
252 pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
259 pgste = ptep_xchg_start(mm, addr, ptep);
260 old = ptep_flush_lazy(mm, addr, ptep);
261 if (mm_has_pgste(mm)) {
262 pgste = pgste_update_all(old, pgste, mm);
263 pgste_set(ptep, pgste);
267 EXPORT_SYMBOL(ptep_modify_prot_start);
269 void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
270 pte_t *ptep, pte_t pte)
274 if (mm_has_pgste(mm)) {
275 pgste = pgste_get(ptep);
276 pgste_set_key(ptep, pgste, pte, mm);
277 pgste = pgste_set_pte(ptep, pgste, pte);
278 pgste_set_unlock(ptep, pgste);
284 EXPORT_SYMBOL(ptep_modify_prot_commit);
286 static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
287 unsigned long addr, pmd_t *pmdp)
292 if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
294 if (!MACHINE_HAS_IDTE) {
298 atomic_inc(&mm->context.flush_count);
299 if (MACHINE_HAS_TLB_LC &&
300 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
301 __pmdp_idte_local(addr, pmdp);
303 __pmdp_idte(addr, pmdp);
304 atomic_dec(&mm->context.flush_count);
308 static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
309 unsigned long addr, pmd_t *pmdp)
314 if (pmd_val(old) & _SEGMENT_ENTRY_INVALID)
316 atomic_inc(&mm->context.flush_count);
317 if (cpumask_equal(&mm->context.cpu_attach_mask,
318 cpumask_of(smp_processor_id()))) {
319 pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
320 mm->context.flush_mm = 1;
321 } else if (MACHINE_HAS_IDTE)
322 __pmdp_idte(addr, pmdp);
325 atomic_dec(&mm->context.flush_count);
329 pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
330 pmd_t *pmdp, pmd_t new)
335 old = pmdp_flush_direct(mm, addr, pmdp);
340 EXPORT_SYMBOL(pmdp_xchg_direct);
342 pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr,
343 pmd_t *pmdp, pmd_t new)
348 old = pmdp_flush_lazy(mm, addr, pmdp);
353 EXPORT_SYMBOL(pmdp_xchg_lazy);
355 static inline pud_t pudp_flush_direct(struct mm_struct *mm,
356 unsigned long addr, pud_t *pudp)
361 if (pud_val(old) & _REGION_ENTRY_INVALID)
363 if (!MACHINE_HAS_IDTE) {
365 * Invalid bit position is the same for pmd and pud, so we can
366 * re-use _pmd_csp() here
368 __pmdp_csp((pmd_t *) pudp);
371 atomic_inc(&mm->context.flush_count);
372 if (MACHINE_HAS_TLB_LC &&
373 cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
374 __pudp_idte_local(addr, pudp);
376 __pudp_idte(addr, pudp);
377 atomic_dec(&mm->context.flush_count);
381 pud_t pudp_xchg_direct(struct mm_struct *mm, unsigned long addr,
382 pud_t *pudp, pud_t new)
387 old = pudp_flush_direct(mm, addr, pudp);
392 EXPORT_SYMBOL(pudp_xchg_direct);
394 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
395 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
398 struct list_head *lh = (struct list_head *) pgtable;
400 assert_spin_locked(pmd_lockptr(mm, pmdp));
403 if (!pmd_huge_pte(mm, pmdp))
406 list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
407 pmd_huge_pte(mm, pmdp) = pgtable;
410 pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
412 struct list_head *lh;
416 assert_spin_locked(pmd_lockptr(mm, pmdp));
419 pgtable = pmd_huge_pte(mm, pmdp);
420 lh = (struct list_head *) pgtable;
422 pmd_huge_pte(mm, pmdp) = NULL;
424 pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
427 ptep = (pte_t *) pgtable;
428 pte_val(*ptep) = _PAGE_INVALID;
430 pte_val(*ptep) = _PAGE_INVALID;
433 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
436 void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr,
437 pte_t *ptep, pte_t entry)
441 /* the mm_has_pgste() check is done in set_pte_at() */
443 pgste = pgste_get_lock(ptep);
444 pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
445 pgste_set_key(ptep, pgste, entry, mm);
446 pgste = pgste_set_pte(ptep, pgste, entry);
447 pgste_set_unlock(ptep, pgste);
451 void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
456 pgste = pgste_get_lock(ptep);
457 pgste_val(pgste) |= PGSTE_IN_BIT;
458 pgste_set_unlock(ptep, pgste);
462 static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
464 if (!non_swap_entry(entry))
465 dec_mm_counter(mm, MM_SWAPENTS);
466 else if (is_migration_entry(entry)) {
467 struct page *page = migration_entry_to_page(entry);
469 dec_mm_counter(mm, mm_counter(page));
471 free_swap_and_cache(entry);
474 void ptep_zap_unused(struct mm_struct *mm, unsigned long addr,
475 pte_t *ptep, int reset)
477 unsigned long pgstev;
481 /* Zap unused and logically-zero pages */
483 pgste = pgste_get_lock(ptep);
484 pgstev = pgste_val(pgste);
486 if (!reset && pte_swap(pte) &&
487 ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED ||
488 (pgstev & _PGSTE_GPS_ZERO))) {
489 ptep_zap_swap_entry(mm, pte_to_swp_entry(pte));
490 pte_clear(mm, addr, ptep);
493 pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
494 pgste_set_unlock(ptep, pgste);
498 void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
503 /* Clear storage key */
505 pgste = pgste_get_lock(ptep);
506 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
507 PGSTE_GR_BIT | PGSTE_GC_BIT);
508 ptev = pte_val(*ptep);
509 if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE))
510 page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1);
511 pgste_set_unlock(ptep, pgste);
516 * Test and reset if a guest page is dirty
518 bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
526 ptep = get_locked_pte(mm, addr, &ptl);
530 pgste = pgste_get_lock(ptep);
531 dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
532 pgste_val(pgste) &= ~PGSTE_UC_BIT;
534 if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
535 pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
536 __ptep_ipte(addr, ptep);
537 if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
538 pte_val(pte) |= _PAGE_PROTECT;
540 pte_val(pte) |= _PAGE_INVALID;
543 pgste_set_unlock(ptep, pgste);
548 EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty);
550 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
551 unsigned char key, bool nq)
558 down_read(&mm->mmap_sem);
559 ptep = get_locked_pte(mm, addr, &ptl);
560 if (unlikely(!ptep)) {
561 up_read(&mm->mmap_sem);
565 new = old = pgste_get_lock(ptep);
566 pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
567 PGSTE_ACC_BITS | PGSTE_FP_BIT);
568 keyul = (unsigned long) key;
569 pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
570 pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
571 if (!(pte_val(*ptep) & _PAGE_INVALID)) {
572 unsigned long address, bits, skey;
574 address = pte_val(*ptep) & PAGE_MASK;
575 skey = (unsigned long) page_get_storage_key(address);
576 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
577 skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
578 /* Set storage key ACC and FP */
579 page_set_storage_key(address, skey, !nq);
580 /* Merge host changed & referenced into pgste */
581 pgste_val(new) |= bits << 52;
583 /* changing the guest storage key is considered a change of the page */
584 if ((pgste_val(new) ^ pgste_val(old)) &
585 (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
586 pgste_val(new) |= PGSTE_UC_BIT;
588 pgste_set_unlock(ptep, new);
589 pte_unmap_unlock(ptep, ptl);
590 up_read(&mm->mmap_sem);
593 EXPORT_SYMBOL(set_guest_storage_key);
595 unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
602 down_read(&mm->mmap_sem);
603 ptep = get_locked_pte(mm, addr, &ptl);
604 if (unlikely(!ptep)) {
605 up_read(&mm->mmap_sem);
608 pgste = pgste_get_lock(ptep);
610 if (pte_val(*ptep) & _PAGE_INVALID) {
611 key = (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56;
612 key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56;
613 key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48;
614 key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48;
616 key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK);
618 /* Reflect guest's logical view, not physical */
619 if (pgste_val(pgste) & PGSTE_GR_BIT)
620 key |= _PAGE_REFERENCED;
621 if (pgste_val(pgste) & PGSTE_GC_BIT)
622 key |= _PAGE_CHANGED;
625 pgste_set_unlock(ptep, pgste);
626 pte_unmap_unlock(ptep, ptl);
627 up_read(&mm->mmap_sem);
630 EXPORT_SYMBOL(get_guest_storage_key);