seccomp: add "seccomp" syscall
[cascardo/linux.git] / kernel / seccomp.c
index f6d76be..f065257 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/compat.h>
 #include <linux/sched.h>
 #include <linux/seccomp.h>
+#include <linux/syscalls.h>
 
 /* #define SECCOMP_DEBUG 1 */
 
@@ -54,8 +55,7 @@
 struct seccomp_filter {
        atomic_t usage;
        struct seccomp_filter *prev;
-       unsigned short len;  /* Instruction count */
-       struct sock_filter_int insnsi[];
+       struct sk_filter *prog;
 };
 
 /* Limit any path through the tree to 256KB worth of instructions. */
@@ -104,60 +104,59 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
                u32 k = ftest->k;
 
                switch (code) {
-               case BPF_S_LD_W_ABS:
+               case BPF_LD | BPF_W | BPF_ABS:
                        ftest->code = BPF_LDX | BPF_W | BPF_ABS;
                        /* 32-bit aligned and not out of bounds. */
                        if (k >= sizeof(struct seccomp_data) || k & 3)
                                return -EINVAL;
                        continue;
-               case BPF_S_LD_W_LEN:
+               case BPF_LD | BPF_W | BPF_LEN:
                        ftest->code = BPF_LD | BPF_IMM;
                        ftest->k = sizeof(struct seccomp_data);
                        continue;
-               case BPF_S_LDX_W_LEN:
+               case BPF_LDX | BPF_W | BPF_LEN:
                        ftest->code = BPF_LDX | BPF_IMM;
                        ftest->k = sizeof(struct seccomp_data);
                        continue;
                /* Explicitly include allowed calls. */
-               case BPF_S_RET_K:
-               case BPF_S_RET_A:
-               case BPF_S_ALU_ADD_K:
-               case BPF_S_ALU_ADD_X:
-               case BPF_S_ALU_SUB_K:
-               case BPF_S_ALU_SUB_X:
-               case BPF_S_ALU_MUL_K:
-               case BPF_S_ALU_MUL_X:
-               case BPF_S_ALU_DIV_X:
-               case BPF_S_ALU_AND_K:
-               case BPF_S_ALU_AND_X:
-               case BPF_S_ALU_OR_K:
-               case BPF_S_ALU_OR_X:
-               case BPF_S_ALU_XOR_K:
-               case BPF_S_ALU_XOR_X:
-               case BPF_S_ALU_LSH_K:
-               case BPF_S_ALU_LSH_X:
-               case BPF_S_ALU_RSH_K:
-               case BPF_S_ALU_RSH_X:
-               case BPF_S_ALU_NEG:
-               case BPF_S_LD_IMM:
-               case BPF_S_LDX_IMM:
-               case BPF_S_MISC_TAX:
-               case BPF_S_MISC_TXA:
-               case BPF_S_ALU_DIV_K:
-               case BPF_S_LD_MEM:
-               case BPF_S_LDX_MEM:
-               case BPF_S_ST:
-               case BPF_S_STX:
-               case BPF_S_JMP_JA:
-               case BPF_S_JMP_JEQ_K:
-               case BPF_S_JMP_JEQ_X:
-               case BPF_S_JMP_JGE_K:
-               case BPF_S_JMP_JGE_X:
-               case BPF_S_JMP_JGT_K:
-               case BPF_S_JMP_JGT_X:
-               case BPF_S_JMP_JSET_K:
-               case BPF_S_JMP_JSET_X:
-                       sk_decode_filter(ftest, ftest);
+               case BPF_RET | BPF_K:
+               case BPF_RET | BPF_A:
+               case BPF_ALU | BPF_ADD | BPF_K:
+               case BPF_ALU | BPF_ADD | BPF_X:
+               case BPF_ALU | BPF_SUB | BPF_K:
+               case BPF_ALU | BPF_SUB | BPF_X:
+               case BPF_ALU | BPF_MUL | BPF_K:
+               case BPF_ALU | BPF_MUL | BPF_X:
+               case BPF_ALU | BPF_DIV | BPF_K:
+               case BPF_ALU | BPF_DIV | BPF_X:
+               case BPF_ALU | BPF_AND | BPF_K:
+               case BPF_ALU | BPF_AND | BPF_X:
+               case BPF_ALU | BPF_OR | BPF_K:
+               case BPF_ALU | BPF_OR | BPF_X:
+               case BPF_ALU | BPF_XOR | BPF_K:
+               case BPF_ALU | BPF_XOR | BPF_X:
+               case BPF_ALU | BPF_LSH | BPF_K:
+               case BPF_ALU | BPF_LSH | BPF_X:
+               case BPF_ALU | BPF_RSH | BPF_K:
+               case BPF_ALU | BPF_RSH | BPF_X:
+               case BPF_ALU | BPF_NEG:
+               case BPF_LD | BPF_IMM:
+               case BPF_LDX | BPF_IMM:
+               case BPF_MISC | BPF_TAX:
+               case BPF_MISC | BPF_TXA:
+               case BPF_LD | BPF_MEM:
+               case BPF_LDX | BPF_MEM:
+               case BPF_ST:
+               case BPF_STX:
+               case BPF_JMP | BPF_JA:
+               case BPF_JMP | BPF_JEQ | BPF_K:
+               case BPF_JMP | BPF_JEQ | BPF_X:
+               case BPF_JMP | BPF_JGE | BPF_K:
+               case BPF_JMP | BPF_JGE | BPF_X:
+               case BPF_JMP | BPF_JGT | BPF_K:
+               case BPF_JMP | BPF_JGT | BPF_X:
+               case BPF_JMP | BPF_JSET | BPF_K:
+               case BPF_JMP | BPF_JSET | BPF_X:
                        continue;
                default:
                        return -EINVAL;
@@ -189,13 +188,30 @@ static u32 seccomp_run_filters(int syscall)
         * value always takes priority (ignoring the DATA).
         */
        for (f = current->seccomp.filter; f; f = f->prev) {
-               u32 cur_ret = sk_run_filter_int_seccomp(&sd, f->insnsi);
+               u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)&sd);
+
                if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
                        ret = cur_ret;
        }
        return ret;
 }
+#endif /* CONFIG_SECCOMP_FILTER */
+
+static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
+{
+       if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
+               return false;
+
+       return true;
+}
 
+static inline void seccomp_assign_mode(unsigned long seccomp_mode)
+{
+       current->seccomp.mode = seccomp_mode;
+       set_tsk_thread_flag(current, TIF_SECCOMP);
+}
+
+#ifdef CONFIG_SECCOMP_FILTER
 /**
  * seccomp_attach_filter: Attaches a seccomp filter to current.
  * @fprog: BPF program to install
@@ -215,7 +231,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
                return -EINVAL;
 
        for (filter = current->seccomp.filter; filter; filter = filter->prev)
-               total_insns += filter->len + 4;  /* include a 4 instr penalty */
+               total_insns += filter->prog->len + 4;  /* include a 4 instr penalty */
        if (total_insns > MAX_INSNS_PER_PATH)
                return -ENOMEM;
 
@@ -256,19 +272,25 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 
        /* Allocate a new seccomp_filter */
        ret = -ENOMEM;
-       filter = kzalloc(sizeof(struct seccomp_filter) +
-                        sizeof(struct sock_filter_int) * new_len,
+       filter = kzalloc(sizeof(struct seccomp_filter),
                         GFP_KERNEL|__GFP_NOWARN);
        if (!filter)
                goto free_prog;
 
-       ret = sk_convert_filter(fp, fprog->len, filter->insnsi, &new_len);
-       if (ret)
+       filter->prog = kzalloc(sk_filter_size(new_len),
+                              GFP_KERNEL|__GFP_NOWARN);
+       if (!filter->prog)
                goto free_filter;
+
+       ret = sk_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len);
+       if (ret)
+               goto free_filter_prog;
        kfree(fp);
 
        atomic_set(&filter->usage, 1);
-       filter->len = new_len;
+       filter->prog->len = new_len;
+
+       sk_filter_select_runtime(filter->prog);
 
        /*
         * If there is an existing filter, make it the prev and don't drop its
@@ -278,6 +300,8 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
        current->seccomp.filter = filter;
        return 0;
 
+free_filter_prog:
+       kfree(filter->prog);
 free_filter:
        kfree(filter);
 free_prog:
@@ -291,7 +315,7 @@ free_prog:
  *
  * Returns 0 on success and non-zero otherwise.
  */
-static long seccomp_attach_user_filter(char __user *user_filter)
+static long seccomp_attach_user_filter(const char __user *user_filter)
 {
        struct sock_fprog fprog;
        long ret = -EFAULT;
@@ -330,6 +354,7 @@ void put_seccomp_filter(struct task_struct *tsk)
        while (orig && atomic_dec_and_test(&orig->usage)) {
                struct seccomp_filter *freeme = orig;
                orig = orig->prev;
+               sk_filter_free(freeme->prog);
                kfree(freeme);
        }
 }
@@ -465,47 +490,126 @@ long prctl_get_seccomp(void)
 }
 
 /**
- * prctl_set_seccomp: configures current->seccomp.mode
- * @seccomp_mode: requested mode to use
- * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
+ * seccomp_set_mode_strict: internal function for setting strict seccomp
+ *
+ * Once current->seccomp.mode is non-zero, it may not be changed.
+ *
+ * Returns 0 on success or -EINVAL on failure.
+ */
+static long seccomp_set_mode_strict(void)
+{
+       const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
+       long ret = -EINVAL;
+
+       if (!seccomp_may_assign_mode(seccomp_mode))
+               goto out;
+
+#ifdef TIF_NOTSC
+       disable_TSC();
+#endif
+       seccomp_assign_mode(seccomp_mode);
+       ret = 0;
+
+out:
+
+       return ret;
+}
+
+#ifdef CONFIG_SECCOMP_FILTER
+/**
+ * seccomp_set_mode_filter: internal function for setting seccomp filter
+ * @flags:  flags to change filter behavior
+ * @filter: struct sock_fprog containing filter
  *
- * This function may be called repeatedly with a @seccomp_mode of
- * SECCOMP_MODE_FILTER to install additional filters.  Every filter
- * successfully installed will be evaluated (in reverse order) for each system
- * call the task makes.
+ * This function may be called repeatedly to install additional filters.
+ * Every filter successfully installed will be evaluated (in reverse order)
+ * for each system call the task makes.
  *
  * Once current->seccomp.mode is non-zero, it may not be changed.
  *
  * Returns 0 on success or -EINVAL on failure.
  */
-long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
+static long seccomp_set_mode_filter(unsigned int flags,
+                                   const char __user *filter)
 {
+       const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
        long ret = -EINVAL;
 
-       if (current->seccomp.mode &&
-           current->seccomp.mode != seccomp_mode)
+       /* Validate flags. */
+       if (flags != 0)
+               goto out;
+
+       if (!seccomp_may_assign_mode(seccomp_mode))
                goto out;
 
+       ret = seccomp_attach_user_filter(filter);
+       if (ret)
+               goto out;
+
+       seccomp_assign_mode(seccomp_mode);
+out:
+       return ret;
+}
+#else
+static inline long seccomp_set_mode_filter(unsigned int flags,
+                                          const char __user *filter)
+{
+       return -EINVAL;
+}
+#endif
+
+/* Common entry point for both prctl and syscall. */
+static long do_seccomp(unsigned int op, unsigned int flags,
+                      const char __user *uargs)
+{
+       switch (op) {
+       case SECCOMP_SET_MODE_STRICT:
+               if (flags != 0 || uargs != NULL)
+                       return -EINVAL;
+               return seccomp_set_mode_strict();
+       case SECCOMP_SET_MODE_FILTER:
+               return seccomp_set_mode_filter(flags, uargs);
+       default:
+               return -EINVAL;
+       }
+}
+
+SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags,
+                        const char __user *, uargs)
+{
+       return do_seccomp(op, flags, uargs);
+}
+
+/**
+ * prctl_set_seccomp: configures current->seccomp.mode
+ * @seccomp_mode: requested mode to use
+ * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
+ *
+ * Returns 0 on success or -EINVAL on failure.
+ */
+long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
+{
+       unsigned int op;
+       char __user *uargs;
+
        switch (seccomp_mode) {
        case SECCOMP_MODE_STRICT:
-               ret = 0;
-#ifdef TIF_NOTSC
-               disable_TSC();
-#endif
+               op = SECCOMP_SET_MODE_STRICT;
+               /*
+                * Setting strict mode through prctl always ignored filter,
+                * so make sure it is always NULL here to pass the internal
+                * check in do_seccomp().
+                */
+               uargs = NULL;
                break;
-#ifdef CONFIG_SECCOMP_FILTER
        case SECCOMP_MODE_FILTER:
-               ret = seccomp_attach_user_filter(filter);
-               if (ret)
-                       goto out;
+               op = SECCOMP_SET_MODE_FILTER;
+               uargs = filter;
                break;
-#endif
        default:
-               goto out;
+               return -EINVAL;
        }
 
-       current->seccomp.mode = seccomp_mode;
-       set_thread_flag(TIF_SECCOMP);
-out:
-       return ret;
+       /* prctl interface doesn't have flags, so they are always zero. */
+       return do_seccomp(op, 0, uargs);
 }