Merge tag 'v4.0-rc5' into x86/fpu, to prevent conflicts
authorIngo Molnar <mingo@kernel.org>
Mon, 23 Mar 2015 09:13:36 +0000 (10:13 +0100)
committerIngo Molnar <mingo@kernel.org>
Mon, 23 Mar 2015 09:13:36 +0000 (10:13 +0100)
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/x86/include/asm/fpu-internal.h
arch/x86/kernel/i387.c
arch/x86/kernel/process.c
arch/x86/kernel/traps.c
arch/x86/kernel/xsave.c

index 72ba21a..810f20f 100644 (file)
@@ -67,6 +67,34 @@ extern void finit_soft_fpu(struct i387_soft_struct *soft);
 static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
 #endif
 
+/*
+ * Must be run with preemption disabled: this clears the fpu_owner_task,
+ * on this CPU.
+ *
+ * This will disable any lazy FPU state restore of the current FPU state,
+ * but if the current thread owns the FPU, it will still be saved by.
+ */
+static inline void __cpu_disable_lazy_restore(unsigned int cpu)
+{
+       per_cpu(fpu_owner_task, cpu) = NULL;
+}
+
+/*
+ * Used to indicate that the FPU state in memory is newer than the FPU
+ * state in registers, and the FPU state should be reloaded next time the
+ * task is run. Only safe on the current task, or non-running tasks.
+ */
+static inline void task_disable_lazy_fpu_restore(struct task_struct *tsk)
+{
+       tsk->thread.fpu.last_cpu = ~0;
+}
+
+static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
+{
+       return new == this_cpu_read_stable(fpu_owner_task) &&
+               cpu == new->thread.fpu.last_cpu;
+}
+
 static inline int is_ia32_compat_frame(void)
 {
        return config_enabled(CONFIG_IA32_EMULATION) &&
@@ -107,7 +135,6 @@ static __always_inline __pure bool use_fxsr(void)
 
 static inline void fx_finit(struct i387_fxsave_struct *fx)
 {
-       memset(fx, 0, xstate_size);
        fx->cwd = 0x37f;
        fx->mxcsr = MXCSR_DEFAULT;
 }
@@ -400,24 +427,6 @@ static inline void drop_init_fpu(struct task_struct *tsk)
  */
 typedef struct { int preload; } fpu_switch_t;
 
-/*
- * Must be run with preemption disabled: this clears the fpu_owner_task,
- * on this CPU.
- *
- * This will disable any lazy FPU state restore of the current FPU state,
- * but if the current thread owns the FPU, it will still be saved by.
- */
-static inline void __cpu_disable_lazy_restore(unsigned int cpu)
-{
-       per_cpu(fpu_owner_task, cpu) = NULL;
-}
-
-static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
-{
-       return new == this_cpu_read_stable(fpu_owner_task) &&
-               cpu == new->thread.fpu.last_cpu;
-}
-
 static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu)
 {
        fpu_switch_t fpu;
@@ -426,13 +435,17 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
         * If the task has used the math, pre-load the FPU on xsave processors
         * or if the past 5 consecutive context-switches used math.
         */
-       fpu.preload = tsk_used_math(new) && (use_eager_fpu() ||
-                                            new->thread.fpu_counter > 5);
+       fpu.preload = tsk_used_math(new) &&
+                     (use_eager_fpu() || new->thread.fpu_counter > 5);
+
        if (__thread_has_fpu(old)) {
                if (!__save_init_fpu(old))
-                       cpu = ~0;
-               old->thread.fpu.last_cpu = cpu;
-               old->thread.fpu.has_fpu = 0;    /* But leave fpu_owner_task! */
+                       task_disable_lazy_fpu_restore(old);
+               else
+                       old->thread.fpu.last_cpu = cpu;
+
+               /* But leave fpu_owner_task! */
+               old->thread.fpu.has_fpu = 0;
 
                /* Don't change CR0.TS if we just switch! */
                if (fpu.preload) {
@@ -443,10 +456,10 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
                        stts();
        } else {
                old->thread.fpu_counter = 0;
-               old->thread.fpu.last_cpu = ~0;
+               task_disable_lazy_fpu_restore(old);
                if (fpu.preload) {
                        new->thread.fpu_counter++;
-                       if (!use_eager_fpu() && fpu_lazy_restore(new, cpu))
+                       if (fpu_lazy_restore(new, cpu))
                                fpu.preload = 0;
                        else
                                prefetch(new->thread.fpu.state);
@@ -519,24 +532,6 @@ static inline void __save_fpu(struct task_struct *tsk)
                fpu_fxsave(&tsk->thread.fpu);
 }
 
-/*
- * These disable preemption on their own and are safe
- */
-static inline void save_init_fpu(struct task_struct *tsk)
-{
-       WARN_ON_ONCE(!__thread_has_fpu(tsk));
-
-       if (use_eager_fpu()) {
-               __save_fpu(tsk);
-               return;
-       }
-
-       preempt_disable();
-       __save_init_fpu(tsk);
-       __thread_fpu_end(tsk);
-       preempt_enable();
-}
-
 /*
  * i387 state interaction
  */
index d5651fc..29e982a 100644 (file)
@@ -42,8 +42,8 @@ void kernel_fpu_enable(void)
  * be set (so that the clts/stts pair does nothing that is
  * visible in the interrupted kernel thread).
  *
- * Except for the eagerfpu case when we return 1 unless we've already
- * been eager and saved the state in kernel_fpu_begin().
+ * Except for the eagerfpu case when we return true; in the likely case
+ * the thread has FPU but we are not going to set/clear TS.
  */
 static inline bool interrupted_kernel_fpu_idle(void)
 {
@@ -51,7 +51,7 @@ static inline bool interrupted_kernel_fpu_idle(void)
                return false;
 
        if (use_eager_fpu())
-               return __thread_has_fpu(current);
+               return true;
 
        return !__thread_has_fpu(current) &&
                (read_cr0() & X86_CR0_TS);
@@ -94,9 +94,10 @@ void __kernel_fpu_begin(void)
 
        if (__thread_has_fpu(me)) {
                __save_init_fpu(me);
-       } else if (!use_eager_fpu()) {
+       } else {
                this_cpu_write(fpu_owner_task, NULL);
-               clts();
+               if (!use_eager_fpu())
+                       clts();
        }
 }
 EXPORT_SYMBOL(__kernel_fpu_begin);
@@ -120,10 +121,13 @@ void unlazy_fpu(struct task_struct *tsk)
 {
        preempt_disable();
        if (__thread_has_fpu(tsk)) {
-               __save_init_fpu(tsk);
-               __thread_fpu_end(tsk);
-       } else
-               tsk->thread.fpu_counter = 0;
+               if (use_eager_fpu()) {
+                       __save_fpu(tsk);
+               } else {
+                       __save_init_fpu(tsk);
+                       __thread_fpu_end(tsk);
+               }
+       }
        preempt_enable();
 }
 EXPORT_SYMBOL(unlazy_fpu);
@@ -221,11 +225,12 @@ void fpu_finit(struct fpu *fpu)
                return;
        }
 
+       memset(fpu->state, 0, xstate_size);
+
        if (cpu_has_fxsr) {
                fx_finit(&fpu->state->fxsave);
        } else {
                struct i387_fsave_struct *fp = &fpu->state->fsave;
-               memset(fp, 0, xstate_size);
                fp->cwd = 0xffff037fu;
                fp->swd = 0xffff0000u;
                fp->twd = 0xffffffffu;
@@ -247,7 +252,7 @@ int init_fpu(struct task_struct *tsk)
        if (tsk_used_math(tsk)) {
                if (cpu_has_fpu && tsk == current)
                        unlazy_fpu(tsk);
-               tsk->thread.fpu.last_cpu = ~0;
+               task_disable_lazy_fpu_restore(tsk);
                return 0;
        }
 
@@ -336,6 +341,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
                unsigned int pos, unsigned int count,
                void *kbuf, void __user *ubuf)
 {
+       struct xsave_struct *xsave = &target->thread.fpu.state->xsave;
        int ret;
 
        if (!cpu_has_xsave)
@@ -350,14 +356,12 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
         * memory layout in the thread struct, so that we can copy the entire
         * xstateregs to the user using one user_regset_copyout().
         */
-       memcpy(&target->thread.fpu.state->fxsave.sw_reserved,
-              xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
-
+       memcpy(&xsave->i387.sw_reserved,
+               xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
        /*
         * Copy the xstate memory layout.
         */
-       ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-                                 &target->thread.fpu.state->xsave, 0, -1);
+       ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
        return ret;
 }
 
@@ -365,8 +369,8 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
                  unsigned int pos, unsigned int count,
                  const void *kbuf, const void __user *ubuf)
 {
+       struct xsave_struct *xsave = &target->thread.fpu.state->xsave;
        int ret;
-       struct xsave_hdr_struct *xsave_hdr;
 
        if (!cpu_has_xsave)
                return -ENODEV;
@@ -375,22 +379,16 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
        if (ret)
                return ret;
 
-       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-                                &target->thread.fpu.state->xsave, 0, -1);
-
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
        /*
         * mxcsr reserved bits must be masked to zero for security reasons.
         */
-       target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask;
-
-       xsave_hdr = &target->thread.fpu.state->xsave.xsave_hdr;
-
-       xsave_hdr->xstate_bv &= pcntxt_mask;
+       xsave->i387.mxcsr &= mxcsr_feature_mask;
+       xsave->xsave_hdr.xstate_bv &= pcntxt_mask;
        /*
         * These bits must be zero.
         */
-       memset(xsave_hdr->reserved, 0, 48);
-
+       memset(&xsave->xsave_hdr.reserved, 0, 48);
        return ret;
 }
 
index 046e2d6..dcaf4b0 100644 (file)
@@ -69,8 +69,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 
        dst->thread.fpu_counter = 0;
        dst->thread.fpu.has_fpu = 0;
-       dst->thread.fpu.last_cpu = ~0;
        dst->thread.fpu.state = NULL;
+       task_disable_lazy_fpu_restore(dst);
        if (tsk_used_math(src)) {
                int err = fpu_alloc(&dst->thread.fpu);
                if (err)
@@ -131,6 +131,7 @@ void flush_thread(void)
 
        flush_ptrace_hw_breakpoint(tsk);
        memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
+
        drop_init_fpu(tsk);
        /*
         * Free the FPU state for non xsave platforms. They get reallocated
@@ -138,6 +139,12 @@ void flush_thread(void)
         */
        if (!use_eager_fpu())
                free_thread_xstate(tsk);
+       else if (!used_math()) {
+               /* kthread execs. TODO: cleanup this horror. */
+               if (WARN_ON(init_fpu(current)))
+                       force_sig(SIGKILL, current);
+               math_state_restore();
+       }
 }
 
 static void hard_disable_TSC(void)
index 4ff5d16..7ee7369 100644 (file)
@@ -734,7 +734,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
        /*
         * Save the info for the exception handler and clear the error.
         */
-       save_init_fpu(task);
+       unlazy_fpu(task);
        task->thread.trap_nr = trapnr;
        task->thread.error_code = error_code;
        info.si_signo = SIGFPE;
index cdc6cf9..0bf82c5 100644 (file)
@@ -690,7 +690,7 @@ void eager_fpu_init(void)
 {
        static __refdata void (*boot_func)(void) = eager_fpu_init_bp;
 
-       clear_used_math();
+       WARN_ON(used_math());
        current_thread_info()->status = 0;
 
        if (eagerfpu == ENABLE)
@@ -705,17 +705,6 @@ void eager_fpu_init(void)
                boot_func();
                boot_func = NULL;
        }
-
-       /*
-        * This is same as math_state_restore(). But use_xsave() is
-        * not yet patched to use math_state_restore().
-        */
-       init_fpu(current);
-       __thread_fpu_begin(current);
-       if (cpu_has_xsave)
-               xrstor_state(init_xstate_buf, -1);
-       else
-               fxrstor_checking(&init_xstate_buf->i387);
 }
 
 /*