thp, mlock: do not mlock PTE-mapped file huge pages
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tue, 26 Jul 2016 22:25:53 +0000 (15:25 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 26 Jul 2016 23:19:19 +0000 (16:19 -0700)
As with anon THP, we only mlock file huge pages if we can prove that the
page is not mapped with PTE.  This way we can avoid mlock leak into
non-mlocked vma on split.

We rely on PageDoubleMap() under lock_page() to check if the the page
may be PTE mapped.  PG_double_map is set by page_add_file_rmap() when
the page mapped with PTEs.

Link: http://lkml.kernel.org/r/1466021202-61880-21-git-send-email-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/page-flags.h
mm/huge_memory.c
mm/mmap.c
mm/page_alloc.c
mm/rmap.c

index 7c8e82a..8cf0963 100644 (file)
@@ -581,6 +581,17 @@ static inline int PageDoubleMap(struct page *page)
        return PageHead(page) && test_bit(PG_double_map, &page[1].flags);
 }
 
+static inline void SetPageDoubleMap(struct page *page)
+{
+       VM_BUG_ON_PAGE(!PageHead(page), page);
+       set_bit(PG_double_map, &page[1].flags);
+}
+
+static inline void ClearPageDoubleMap(struct page *page)
+{
+       VM_BUG_ON_PAGE(!PageHead(page), page);
+       clear_bit(PG_double_map, &page[1].flags);
+}
 static inline int TestSetPageDoubleMap(struct page *page)
 {
        VM_BUG_ON_PAGE(!PageHead(page), page);
@@ -598,7 +609,7 @@ TESTPAGEFLAG_FALSE(TransHuge)
 TESTPAGEFLAG_FALSE(TransCompound)
 TESTPAGEFLAG_FALSE(TransCompoundMap)
 TESTPAGEFLAG_FALSE(TransTail)
-TESTPAGEFLAG_FALSE(DoubleMap)
+PAGEFLAG_FALSE(DoubleMap)
        TESTSETFLAG_FALSE(DoubleMap)
        TESTCLEARFLAG_FALSE(DoubleMap)
 #endif
index 4860777..3b74fea 100644 (file)
@@ -1437,6 +1437,8 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
                 * We don't mlock() pte-mapped THPs. This way we can avoid
                 * leaking mlocked pages into non-VM_LOCKED VMAs.
                 *
+                * For anon THP:
+                *
                 * In most cases the pmd is the only mapping of the page as we
                 * break COW for the mlock() -- see gup_flags |= FOLL_WRITE for
                 * writable private mappings in populate_vma_page_range().
@@ -1444,15 +1446,26 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
                 * The only scenario when we have the page shared here is if we
                 * mlocking read-only mapping shared over fork(). We skip
                 * mlocking such pages.
+                *
+                * For file THP:
+                *
+                * We can expect PageDoubleMap() to be stable under page lock:
+                * for file pages we set it in page_add_file_rmap(), which
+                * requires page to be locked.
                 */
-               if (compound_mapcount(page) == 1 && !PageDoubleMap(page) &&
-                               page->mapping && trylock_page(page)) {
-                       lru_add_drain();
-                       if (page->mapping)
-                               mlock_vma_page(page);
-                       unlock_page(page);
-               }
+
+               if (PageAnon(page) && compound_mapcount(page) != 1)
+                       goto skip_mlock;
+               if (PageDoubleMap(page) || !page->mapping)
+                       goto skip_mlock;
+               if (!trylock_page(page))
+                       goto skip_mlock;
+               lru_add_drain();
+               if (page->mapping && !PageDoubleMap(page))
+                       mlock_vma_page(page);
+               unlock_page(page);
        }
+skip_mlock:
        page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
        VM_BUG_ON_PAGE(!PageCompound(page), page);
        if (flags & FOLL_GET)
index 31f9b22..a41872c 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2591,6 +2591,12 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
                /* drop PG_Mlocked flag for over-mapped range */
                for (tmp = vma; tmp->vm_start >= start + size;
                                tmp = tmp->vm_next) {
+                       /*
+                        * Split pmd and munlock page on the border
+                        * of the range.
+                        */
+                       vma_adjust_trans_huge(tmp, start, start + size, 0);
+
                        munlock_vma_pages_range(tmp,
                                        max(tmp->vm_start, start),
                                        min(tmp->vm_end, start + size));
index 7023a31..847281e 100644 (file)
@@ -1007,6 +1007,8 @@ static __always_inline bool free_pages_prepare(struct page *page,
 
                VM_BUG_ON_PAGE(compound && compound_order(page) != order, page);
 
+               if (compound)
+                       ClearPageDoubleMap(page);
                for (i = 1; i < (1 << order); i++) {
                        if (compound)
                                bad += free_tail_pages_check(page, page + i);
index 2b336c4..9d643b7 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1284,6 +1284,12 @@ void page_add_file_rmap(struct page *page, bool compound)
                if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
                        goto out;
        } else {
+               if (PageTransCompound(page)) {
+                       VM_BUG_ON_PAGE(!PageLocked(page), page);
+                       SetPageDoubleMap(compound_head(page));
+                       if (PageMlocked(page))
+                               clear_page_mlock(compound_head(page));
+               }
                if (!atomic_inc_and_test(&page->_mapcount))
                        goto out;
        }
@@ -1458,8 +1464,14 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
         */
        if (!(flags & TTU_IGNORE_MLOCK)) {
                if (vma->vm_flags & VM_LOCKED) {
-                       /* Holding pte lock, we do *not* need mmap_sem here */
-                       mlock_vma_page(page);
+                       /* PTE-mapped THP are never mlocked */
+                       if (!PageTransCompound(page)) {
+                               /*
+                                * Holding pte lock, we do *not* need
+                                * mmap_sem here
+                                */
+                               mlock_vma_page(page);
+                       }
                        ret = SWAP_MLOCK;
                        goto out_unmap;
                }