futex: Fault/error injection capabilities
authorDavidlohr Bueso <dave@stgolabs.net>
Tue, 30 Jun 2015 06:26:02 +0000 (23:26 -0700)
committerThomas Gleixner <tglx@linutronix.de>
Mon, 20 Jul 2015 09:45:45 +0000 (11:45 +0200)
Although futexes are well known for being a royal pita,
we really have very little debugging capabilities - except
for relying on tglx's eye half the time.

By simply making use of the existing fault-injection machinery,
we can improve this situation, allowing generating artificial
uaddress faults and deadlock scenarios. Of course, when this is
disabled in production systems, the overhead for failure checks
is practically zero -- so this is very cheap at the same time.
Future work would be nice to now enhance trinity to make use of
this.

There is a special tunable 'ignore-private', which can filter
out private futexes. Given the tsk->make_it_fail filter and
this option, pi futexes can be narrowed down pretty closely.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Darren Hart <darren@dvhart.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Link: http://lkml.kernel.org/r/1435645562-975-3-git-send-email-dave@stgolabs.net
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Documentation/fault-injection/fault-injection.txt
kernel/futex.c
lib/Kconfig.debug

index 4cf1a2a..415484f 100644 (file)
@@ -15,6 +15,10 @@ o fail_page_alloc
 
   injects page allocation failures. (alloc_pages(), get_free_pages(), ...)
 
+o fail_futex
+
+  injects futex deadlock and uaddr fault errors.
+
 o fail_make_request
 
   injects disk IO errors on devices permitted by setting
@@ -113,6 +117,12 @@ configuration of fault-injection capabilities.
        specifies the minimum page allocation order to be injected
        failures.
 
+- /sys/kernel/debug/fail_futex/ignore-private:
+
+       Format: { 'Y' | 'N' }
+       default is 'N', setting it to 'Y' will disable failure injections
+       when dealing with private (address space) futexes.
+
 o Boot option
 
 In order to inject faults while debugfs is not available (early boot time),
@@ -121,6 +131,7 @@ use the boot option:
        failslab=
        fail_page_alloc=
        fail_make_request=
+       fail_futex=
        mmc_core.fail_request=<interval>,<probability>,<space>,<times>
 
 How to add new fault injection capability
index 153eb22..6ea31bb 100644 (file)
@@ -64,6 +64,7 @@
 #include <linux/hugetlb.h>
 #include <linux/freezer.h>
 #include <linux/bootmem.h>
+#include <linux/fault-inject.h>
 
 #include <asm/futex.h>
 
@@ -258,6 +259,66 @@ static unsigned long __read_mostly futex_hashsize;
 
 static struct futex_hash_bucket *futex_queues;
 
+/*
+ * Fault injections for futexes.
+ */
+#ifdef CONFIG_FAIL_FUTEX
+
+static struct {
+       struct fault_attr attr;
+
+       u32 ignore_private;
+} fail_futex = {
+       .attr = FAULT_ATTR_INITIALIZER,
+       .ignore_private = 0,
+};
+
+static int __init setup_fail_futex(char *str)
+{
+       return setup_fault_attr(&fail_futex.attr, str);
+}
+__setup("fail_futex=", setup_fail_futex);
+
+bool should_fail_futex(bool fshared)
+{
+       if (fail_futex.ignore_private && !fshared)
+               return false;
+
+       return should_fail(&fail_futex.attr, 1);
+}
+
+#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+
+static int __init fail_futex_debugfs(void)
+{
+       umode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
+       struct dentry *dir;
+
+       dir = fault_create_debugfs_attr("fail_futex", NULL,
+                                       &fail_futex.attr);
+       if (IS_ERR(dir))
+               return PTR_ERR(dir);
+
+       if (!debugfs_create_bool("ignore-private", mode, dir,
+                                &fail_futex.ignore_private)) {
+               debugfs_remove_recursive(dir);
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+late_initcall(fail_futex_debugfs);
+
+#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
+
+#else
+static inline bool should_fail_futex(bool fshared)
+{
+       return false;
+}
+#endif /* CONFIG_FAIL_FUTEX */
+
 static inline void futex_get_mm(union futex_key *key)
 {
        atomic_inc(&key->private.mm->mm_count);
@@ -413,6 +474,9 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
        if (unlikely(!access_ok(rw, uaddr, sizeof(u32))))
                return -EFAULT;
 
+       if (unlikely(should_fail_futex(fshared)))
+               return -EFAULT;
+
        /*
         * PROCESS_PRIVATE futexes are fast.
         * As the mm cannot disappear under us and the 'key' only needs
@@ -428,6 +492,10 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
        }
 
 again:
+       /* Ignore any VERIFY_READ mapping (futex common case) */
+       if (unlikely(should_fail_futex(fshared)))
+               return -EFAULT;
+
        err = get_user_pages_fast(address, 1, 1, &page);
        /*
         * If write access is not required (eg. FUTEX_WAIT), try
@@ -516,7 +584,7 @@ again:
                 * A RO anonymous page will never change and thus doesn't make
                 * sense for futex operations.
                 */
-               if (ro) {
+               if (unlikely(should_fail_futex(fshared)) || ro) {
                        err = -EFAULT;
                        goto out;
                }
@@ -974,6 +1042,9 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval)
 {
        u32 uninitialized_var(curval);
 
+       if (unlikely(should_fail_futex(true)))
+               return -EFAULT;
+
        if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)))
                return -EFAULT;
 
@@ -1015,12 +1086,18 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
        if (get_futex_value_locked(&uval, uaddr))
                return -EFAULT;
 
+       if (unlikely(should_fail_futex(true)))
+               return -EFAULT;
+
        /*
         * Detect deadlocks.
         */
        if ((unlikely((uval & FUTEX_TID_MASK) == vpid)))
                return -EDEADLK;
 
+       if ((unlikely(should_fail_futex(true))))
+               return -EDEADLK;
+
        /*
         * Lookup existing state first. If it exists, try to attach to
         * its pi_state.
@@ -1155,6 +1232,9 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this,
         */
        newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
 
+       if (unlikely(should_fail_futex(true)))
+               ret = -EFAULT;
+
        if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
                ret = -EFAULT;
        else if (curval != uval)
@@ -1457,6 +1537,9 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
        if (get_futex_value_locked(&curval, pifutex))
                return -EFAULT;
 
+       if (unlikely(should_fail_futex(true)))
+               return -EFAULT;
+
        /*
         * Find the top_waiter and determine if there are additional waiters.
         * If the caller intends to requeue more than 1 waiter to pifutex,
@@ -2537,7 +2620,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
  * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
  * @uaddr:     the futex we initially wait on (non-pi)
  * @flags:     futex flags (FLAGS_SHARED, FLAGS_CLOCKRT, etc.), they must be
- *             the same type, no requeueing from private to shared, etc.
+ *             the same type, no requeueing from private to shared, etc.
  * @val:       the expected value of uaddr
  * @abs_time:  absolute timeout
  * @bitset:    32 bit wakeup bitset set by userspace, defaults to all
@@ -3012,6 +3095,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
        if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
                      cmd == FUTEX_WAIT_BITSET ||
                      cmd == FUTEX_WAIT_REQUEUE_PI)) {
+               if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
+                       return -EFAULT;
                if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
                        return -EFAULT;
                if (!timespec_valid(&ts))
index e2894b2..22554d6 100644 (file)
@@ -1542,6 +1542,13 @@ config FAIL_MMC_REQUEST
          and to test how the mmc host driver handles retries from
          the block device.
 
+config FAIL_FUTEX
+       bool "Fault-injection capability for futexes"
+       select DEBUG_FS
+       depends on FAULT_INJECTION && FUTEX
+       help
+         Provide fault-injection capability for futexes.
+
 config FAULT_INJECTION_DEBUG_FS
        bool "Debugfs entries for fault-injection capabilities"
        depends on FAULT_INJECTION && SYSFS && DEBUG_FS