x86: mm: consolidate VM_FAULT_RETRY handling
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 15 Dec 2014 23:07:33 +0000 (15:07 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 15 Dec 2014 23:07:33 +0000 (15:07 -0800)
The VM_FAULT_RETRY handling was confusing and incorrect for the case of
returning to kernel mode.  We need to handle the exception table fixup
if we return to kernel mode due to a fatal signal - it will basically
look to the kernel user mode access like the access failed due to the VM
going away from udner it.  Which is correct - the process is dying - and
avoids the whole "repeat endless kernel page faults" case.

Handling the VM_FAULT_RETRY early and in just one place also simplifies
the mmap_sem handling, since once we've taken care of VM_FAULT_RETRY we
know that we can just drop the lock.  The remaining accounting and
possible error handling is thread-local and does not need the mmap_sem.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
arch/x86/mm/fault.c

index d7de5b8..b74a7e1 100644 (file)
@@ -1055,7 +1055,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
        struct vm_area_struct *vma;
        struct task_struct *tsk;
        struct mm_struct *mm;
-       int fault;
+       int fault, major = 0;
        unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
        tsk = current;
@@ -1230,48 +1230,50 @@ good_area:
         * we get VM_FAULT_RETRY back, the mmap_sem has been unlocked.
         */
        fault = handle_mm_fault(mm, vma, address, flags);
+       major |= fault & VM_FAULT_MAJOR;
 
        /*
-        * If we need to retry but a fatal signal is pending, handle the
-        * signal first. We do not need to release the mmap_sem because it
-        * would already be released in __lock_page_or_retry in mm/filemap.c.
+        * If we need to retry the mmap_sem has already been released,
+        * and if there is a fatal signal pending there is no guarantee
+        * that we made any progress. Handle this case first.
         */
-       if (unlikely((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)))
+       if (unlikely(fault & VM_FAULT_RETRY)) {
+               /* Retry at most once */
+               if (flags & FAULT_FLAG_ALLOW_RETRY) {
+                       flags &= ~FAULT_FLAG_ALLOW_RETRY;
+                       flags |= FAULT_FLAG_TRIED;
+                       if (!fatal_signal_pending(tsk))
+                               goto retry;
+               }
+
+               /* User mode? Just return to handle the fatal exception */
+               if (fault & FAULT_FLAG_USER)
+                       return;
+
+               /* Not returning to user mode? Handle exceptions or die: */
+               no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
                return;
+       }
 
+       up_read(&mm->mmap_sem);
        if (unlikely(fault & VM_FAULT_ERROR)) {
-               up_read(&mm->mmap_sem);
                mm_fault_error(regs, error_code, address, fault);
                return;
        }
 
        /*
-        * Major/minor page fault accounting is only done on the
-        * initial attempt. If we go through a retry, it is extremely
-        * likely that the page will be found in page cache at that point.
+        * Major/minor page fault accounting. If any of the events
+        * returned VM_FAULT_MAJOR, we account it as a major fault.
         */
-       if (flags & FAULT_FLAG_ALLOW_RETRY) {
-               if (fault & VM_FAULT_MAJOR) {
-                       tsk->maj_flt++;
-                       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
-                                     regs, address);
-               } else {
-                       tsk->min_flt++;
-                       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
-                                     regs, address);
-               }
-               if (fault & VM_FAULT_RETRY) {
-                       /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
-                        * of starvation. */
-                       flags &= ~FAULT_FLAG_ALLOW_RETRY;
-                       flags |= FAULT_FLAG_TRIED;
-                       goto retry;
-               }
+       if (major) {
+               tsk->maj_flt++;
+               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
+       } else {
+               tsk->min_flt++;
+               perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
        }
 
        check_v8086_mode(regs, address, tsk);
-
-       up_read(&mm->mmap_sem);
 }
 NOKPROBE_SYMBOL(__do_page_fault);