x86: move fpu_counter into ARCH specific thread_struct
authorVineet Gupta <Vineet.Gupta1@synopsys.com>
Tue, 12 Nov 2013 23:08:46 +0000 (15:08 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 13 Nov 2013 03:09:13 +0000 (12:09 +0900)
Only a couple of arches (sh/x86) use fpu_counter in task_struct so it can
be moved out into ARCH specific thread_struct, reducing the size of
task_struct for other arches.

Compile tested i386_defconfig + gcc 4.7.3

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Paul Mundt <paul.mundt@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
arch/x86/include/asm/fpu-internal.h
arch/x86/include/asm/processor.h
arch/x86/kernel/i387.c
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
arch/x86/kernel/traps.c

index 4d0bda7..c49a613 100644 (file)
@@ -365,7 +365,7 @@ static inline void drop_fpu(struct task_struct *tsk)
         * Forget coprocessor state..
         */
        preempt_disable();
-       tsk->fpu_counter = 0;
+       tsk->thread.fpu_counter = 0;
        __drop_fpu(tsk);
        clear_used_math();
        preempt_enable();
@@ -424,7 +424,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
         * or if the past 5 consecutive context-switches used math.
         */
        fpu.preload = tsk_used_math(new) && (use_eager_fpu() ||
-                                            new->fpu_counter > 5);
+                                            new->thread.fpu_counter > 5);
        if (__thread_has_fpu(old)) {
                if (!__save_init_fpu(old))
                        cpu = ~0;
@@ -433,16 +433,16 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
 
                /* Don't change CR0.TS if we just switch! */
                if (fpu.preload) {
-                       new->fpu_counter++;
+                       new->thread.fpu_counter++;
                        __thread_set_has_fpu(new);
                        prefetch(new->thread.fpu.state);
                } else if (!use_eager_fpu())
                        stts();
        } else {
-               old->fpu_counter = 0;
+               old->thread.fpu_counter = 0;
                old->thread.fpu.last_cpu = ~0;
                if (fpu.preload) {
-                       new->fpu_counter++;
+                       new->thread.fpu_counter++;
                        if (!use_eager_fpu() && fpu_lazy_restore(new, cpu))
                                fpu.preload = 0;
                        else
index 987c75e..7b034a4 100644 (file)
@@ -488,6 +488,15 @@ struct thread_struct {
        unsigned long           iopl;
        /* Max allowed port in the bitmap, in bytes: */
        unsigned                io_bitmap_max;
+       /*
+        * fpu_counter contains the number of consecutive context switches
+        * that the FPU is used. If this is over a threshold, the lazy fpu
+        * saving becomes unlazy to save the trap. This is an unsigned char
+        * so that after 256 times the counter wraps and the behavior turns
+        * lazy again; this to deal with bursty apps that only use FPU for
+        * a short time
+        */
+       unsigned char fpu_counter;
 };
 
 /*
index 5d576ab..e8368c6 100644 (file)
@@ -100,7 +100,7 @@ void unlazy_fpu(struct task_struct *tsk)
                __save_init_fpu(tsk);
                __thread_fpu_end(tsk);
        } else
-               tsk->fpu_counter = 0;
+               tsk->thread.fpu_counter = 0;
        preempt_enable();
 }
 EXPORT_SYMBOL(unlazy_fpu);
index c2ec1aa..6f1236c 100644 (file)
@@ -153,7 +153,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
                childregs->orig_ax = -1;
                childregs->cs = __KERNEL_CS | get_kernel_rpl();
                childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
-               p->fpu_counter = 0;
+               p->thread.fpu_counter = 0;
                p->thread.io_bitmap_ptr = NULL;
                memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
                return 0;
@@ -166,7 +166,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
        p->thread.ip = (unsigned long) ret_from_fork;
        task_user_gs(p) = get_user_gs(current_pt_regs());
 
-       p->fpu_counter = 0;
+       p->thread.fpu_counter = 0;
        p->thread.io_bitmap_ptr = NULL;
        tsk = current;
        err = -ENOMEM;
index 45ab4d6..10fe4c1 100644 (file)
@@ -163,7 +163,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
        p->thread.sp = (unsigned long) childregs;
        p->thread.usersp = me->thread.usersp;
        set_tsk_thread_flag(p, TIF_FORK);
-       p->fpu_counter = 0;
+       p->thread.fpu_counter = 0;
        p->thread.io_bitmap_ptr = NULL;
 
        savesegment(gs, p->thread.gsindex);
index 729aa77..996ce23 100644 (file)
@@ -653,7 +653,7 @@ void math_state_restore(void)
                return;
        }
 
-       tsk->fpu_counter++;
+       tsk->thread.fpu_counter++;
 }
 EXPORT_SYMBOL_GPL(math_state_restore);