Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 23 May 2012 18:06:59 +0000 (11:06 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 23 May 2012 18:06:59 +0000 (11:06 -0700)
Pull x86 mm changes from Ingo Molnar:
 "This tree includes a micro-optimization that avoids cr3 switches
  during idling; it fixes corner cases and there's also small cleanups"

Fix up trivial context conflict with the percpu_xx -> this_cpu_xx
changes.

* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86-64: Fix accounting in kernel_physical_mapping_init()
  x86/tlb: Clean up and unify TLB_FLUSH_ALL definition
  x86: Drop obsolete ARCH_BOOTMEM support
  x86, tlb: Switch cr3 in leave_mm() only when needed
  x86/mm: Fix the size calculation of mapping tables

1  2 
arch/x86/Kconfig
arch/x86/include/asm/tlbflush.h
arch/x86/mm/init.c
arch/x86/mm/init_64.c
arch/x86/mm/tlb.c

diff --combined arch/x86/Kconfig
@@@ -40,6 -40,7 +40,6 @@@ config X8
        select HAVE_FUNCTION_GRAPH_TRACER
        select HAVE_FUNCTION_GRAPH_FP_TEST
        select HAVE_FUNCTION_TRACE_MCOUNT_TEST
 -      select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
        select HAVE_SYSCALL_TRACEPOINTS
        select HAVE_KVM
        select HAVE_ARCH_KGDB
@@@ -68,6 -69,7 +68,6 @@@
        select HAVE_ARCH_JUMP_LABEL
        select HAVE_TEXT_POKE_SMP
        select HAVE_GENERIC_HARDIRQS
 -      select HAVE_SPARSE_IRQ
        select SPARSE_IRQ
        select GENERIC_FIND_FIRST_BIT
        select GENERIC_IRQ_PROBE
        select GENERIC_CLOCKEVENTS_MIN_ADJUST
        select IRQ_FORCED_THREADING
        select USE_GENERIC_SMP_HELPERS if SMP
 -      select HAVE_BPF_JIT if (X86_64 && NET)
 +      select HAVE_BPF_JIT if X86_64
        select CLKEVT_I8253
        select ARCH_HAVE_NMI_SAFE_CMPXCHG
        select GENERIC_IOMAP
 +      select DCACHE_WORD_ACCESS
 +      select GENERIC_SMP_IDLE_THREAD
 +      select HAVE_ARCH_SECCOMP_FILTER
 +      select BUILDTIME_EXTABLE_SORT
  
  config INSTRUCTION_DECODER
        def_bool (KPROBES || PERF_EVENTS)
@@@ -162,6 -160,9 +162,6 @@@ config RWSEM_GENERIC_SPINLOC
  config RWSEM_XCHGADD_ALGORITHM
        def_bool X86_XADD
  
 -config ARCH_HAS_CPU_IDLE_WAIT
 -      def_bool y
 -
  config GENERIC_CALIBRATE_DELAY
        def_bool y
  
@@@ -178,9 -179,6 +178,9 @@@ config ARCH_HAS_DEFAULT_IDL
  config ARCH_HAS_CACHE_LINE_SIZE
        def_bool y
  
 +config ARCH_HAS_CPU_AUTOPROBE
 +      def_bool y
 +
  config HAVE_SETUP_PER_CPU_AREA
        def_bool y
  
@@@ -400,7 -398,6 +400,7 @@@ config X86_INTEL_C
        select X86_REBOOTFIXUPS
        select OF
        select OF_EARLY_FLATTREE
 +      select IRQ_DOMAIN
        ---help---
          Select for the Intel CE media processor (CE4100) SOC.
          This option compiles in support for the CE4100 SOC for settop
@@@ -420,6 -417,27 +420,6 @@@ if X86_WANT_INTEL_MI
  config X86_INTEL_MID
        bool
  
 -config X86_MRST
 -       bool "Moorestown MID platform"
 -      depends on PCI
 -      depends on PCI_GOANY
 -      depends on X86_IO_APIC
 -      select X86_INTEL_MID
 -      select SFI
 -      select DW_APB_TIMER
 -      select APB_TIMER
 -      select I2C
 -      select SPI
 -      select INTEL_SCU_IPC
 -      select X86_PLATFORM_DEVICES
 -      ---help---
 -        Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin
 -        Internet Device(MID) platform. Moorestown consists of two chips:
 -        Lincroft (CPU core, graphics, and memory controller) and Langwell IOH.
 -        Unlike standard x86 PCs, Moorestown does not have many legacy devices
 -        nor standard legacy replacement devices/features. e.g. Moorestown does
 -        not contain i8259, i8254, HPET, legacy BIOS, most of the io ports.
 -
  config X86_MDFLD
         bool "Medfield MID platform"
        depends on PCI
        select SPI
        select INTEL_SCU_IPC
        select X86_PLATFORM_DEVICES
 +      select MFD_INTEL_MSIC
        ---help---
          Medfield is Intel's Low Power Intel Architecture (LPIA) based Moblin
          Internet Device(MID) platform. 
@@@ -1238,10 -1255,6 +1238,6 @@@ config NODES_SHIF
          Specify the maximum number of NUMA Nodes available on the target
          system.  Increases memory reserved to accommodate various tables.
  
- config HAVE_ARCH_BOOTMEM
-       def_bool y
-       depends on X86_32 && NUMA
  config HAVE_ARCH_ALLOC_REMAP
        def_bool y
        depends on X86_32 && NUMA
@@@ -2059,7 -2072,6 +2055,7 @@@ config OLP
        select GPIOLIB
        select OF
        select OF_PROMTREE
 +      select IRQ_DOMAIN
        ---help---
          Add support for detecting the unique features of the OLPC
          XO hardware.
@@@ -2117,19 -2129,6 +2113,19 @@@ config ALI
  
          Note: You have to set alix.force=1 for boards with Award BIOS.
  
 +config NET5501
 +      bool "Soekris Engineering net5501 System Support (LEDS, GPIO, etc)"
 +      select GPIOLIB
 +      ---help---
 +        This option enables system support for the Soekris Engineering net5501.
 +
 +config GEOS
 +      bool "Traverse Technologies GEOS System Support (LEDS, GPIO, etc)"
 +      select GPIOLIB
 +      depends on DMI
 +      ---help---
 +        This option enables system support for the Traverse Technologies GEOS.
 +
  endif # X86_32
  
  config AMD_NB
@@@ -2162,9 -2161,9 +2158,9 @@@ config IA32_EMULATIO
        depends on X86_64
        select COMPAT_BINFMT_ELF
        ---help---
 -        Include code to run 32-bit programs under a 64-bit kernel. You should
 -        likely turn this on, unless you're 100% sure that you don't have any
 -        32-bit programs left.
 +        Include code to run legacy 32-bit programs under a
 +        64-bit kernel. You should likely turn this on, unless you're
 +        100% sure that you don't have any 32-bit programs left.
  
  config IA32_AOUT
        tristate "IA32 a.out support"
        ---help---
          Support old a.out binaries in the 32bit emulation.
  
 +config X86_X32
 +      bool "x32 ABI for 64-bit mode (EXPERIMENTAL)"
 +      depends on X86_64 && IA32_EMULATION && EXPERIMENTAL
 +      ---help---
 +        Include code to run binaries for the x32 native 32-bit ABI
 +        for 64-bit processors.  An x32 process gets access to the
 +        full 64-bit register file and wide data path while leaving
 +        pointers at 32 bits for smaller memory footprint.
 +
 +        You will need a recent binutils (2.22 or later) with
 +        elf32_x86_64 support enabled to compile a kernel with this
 +        option set.
 +
  config COMPAT
        def_bool y
 -      depends on IA32_EMULATION
 +      depends on IA32_EMULATION || X86_X32
 +      select ARCH_WANT_OLD_COMPAT_IPC
  
  config COMPAT_FOR_U64_ALIGNMENT
        def_bool COMPAT
@@@ -5,7 -5,7 +5,7 @@@
  #include <linux/sched.h>
  
  #include <asm/processor.h>
 -#include <asm/system.h>
 +#include <asm/special_insns.h>
  
  #ifdef CONFIG_PARAVIRT
  #include <asm/paravirt.h>
@@@ -62,11 -62,7 +62,7 @@@ static inline void __flush_tlb_one(unsi
                __flush_tlb();
  }
  
- #ifdef CONFIG_X86_32
- # define TLB_FLUSH_ALL        0xffffffff
- #else
- # define TLB_FLUSH_ALL        -1ULL
- #endif
+ #define TLB_FLUSH_ALL -1UL
  
  /*
   * TLB flushing:
@@@ -156,8 -152,8 +152,8 @@@ DECLARE_PER_CPU_SHARED_ALIGNED(struct t
  
  static inline void reset_lazy_tlbstate(void)
  {
 -      percpu_write(cpu_tlbstate.state, 0);
 -      percpu_write(cpu_tlbstate.active_mm, &init_mm);
 +      this_cpu_write(cpu_tlbstate.state, 0);
 +      this_cpu_write(cpu_tlbstate.active_mm, &init_mm);
  }
  
  #endif        /* SMP */
diff --combined arch/x86/mm/init.c
@@@ -12,6 -12,7 +12,6 @@@
  #include <asm/page_types.h>
  #include <asm/sections.h>
  #include <asm/setup.h>
 -#include <asm/system.h>
  #include <asm/tlbflush.h>
  #include <asm/tlb.h>
  #include <asm/proto.h>
@@@ -29,8 -30,14 +29,14 @@@ int direct_gbpage
  #endif
  ;
  
- static void __init find_early_table_space(unsigned long end, int use_pse,
-                                         int use_gbpages)
+ struct map_range {
+       unsigned long start;
+       unsigned long end;
+       unsigned page_size_mask;
+ };
+ static void __init find_early_table_space(struct map_range *mr, unsigned long end,
+                                         int use_pse, int use_gbpages)
  {
        unsigned long puds, pmds, ptes, tables, start = 0, good_end = end;
        phys_addr_t base;
@@@ -55,6 -62,9 +61,9 @@@
  #ifdef CONFIG_X86_32
                extra += PMD_SIZE;
  #endif
+               /* The first 2/4M doesn't use large pages. */
+               extra += mr->end - mr->start;
                ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
        } else
                ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@@ -84,12 -94,6 +93,6 @@@ void __init native_pagetable_reserve(u6
        memblock_reserve(start, end - start);
  }
  
- struct map_range {
-       unsigned long start;
-       unsigned long end;
-       unsigned page_size_mask;
- };
  #ifdef CONFIG_X86_32
  #define NR_RANGE_MR 3
  #else /* CONFIG_X86_64 */
@@@ -261,7 -265,7 +264,7 @@@ unsigned long __init_refok init_memory_
         * nodes are discovered.
         */
        if (!after_bootmem)
-               find_early_table_space(end, use_pse, use_gbpages);
+               find_early_table_space(&mr[0], end, use_pse, use_gbpages);
  
        for (i = 0; i < nr_range; i++)
                ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
diff --combined arch/x86/mm/init_64.c
@@@ -35,6 -35,7 +35,6 @@@
  
  #include <asm/processor.h>
  #include <asm/bios_ebda.h>
 -#include <asm/system.h>
  #include <asm/uaccess.h>
  #include <asm/pgtable.h>
  #include <asm/pgalloc.h>
@@@ -407,12 -408,12 +407,12 @@@ static unsigned long __memini
  phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
              unsigned long page_size_mask, pgprot_t prot)
  {
-       unsigned long pages = 0;
+       unsigned long pages = 0, next;
        unsigned long last_map_addr = end;
  
        int i = pmd_index(address);
  
-       for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
+       for (; i < PTRS_PER_PMD; i++, address = next) {
                unsigned long pte_phys;
                pmd_t *pmd = pmd_page + pmd_index(address);
                pte_t *pte;
                        break;
                }
  
+               next = (address & PMD_MASK) + PMD_SIZE;
                if (pmd_val(*pmd)) {
                        if (!pmd_large(*pmd)) {
                                spin_lock(&init_mm.page_table_lock);
                         * attributes.
                         */
                        if (page_size_mask & (1 << PG_LEVEL_2M)) {
-                               pages++;
+                               last_map_addr = next;
                                continue;
                        }
                        new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd));
                                pfn_pte(address >> PAGE_SHIFT,
                                        __pgprot(pgprot_val(prot) | _PAGE_PSE)));
                        spin_unlock(&init_mm.page_table_lock);
-                       last_map_addr = (address & PMD_MASK) + PMD_SIZE;
+                       last_map_addr = next;
                        continue;
                }
  
@@@ -482,11 -485,11 +484,11 @@@ static unsigned long __memini
  phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
                         unsigned long page_size_mask)
  {
-       unsigned long pages = 0;
+       unsigned long pages = 0, next;
        unsigned long last_map_addr = end;
        int i = pud_index(addr);
  
-       for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE) {
+       for (; i < PTRS_PER_PUD; i++, addr = next) {
                unsigned long pmd_phys;
                pud_t *pud = pud_page + pud_index(addr);
                pmd_t *pmd;
                if (addr >= end)
                        break;
  
-               if (!after_bootmem &&
-                               !e820_any_mapped(addr, addr+PUD_SIZE, 0)) {
+               next = (addr & PUD_MASK) + PUD_SIZE;
+               if (!after_bootmem && !e820_any_mapped(addr, next, 0)) {
                        set_pud(pud, __pud(0));
                        continue;
                }
                         * attributes.
                         */
                        if (page_size_mask & (1 << PG_LEVEL_1G)) {
-                               pages++;
+                               last_map_addr = next;
                                continue;
                        }
                        prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud));
                        set_pte((pte_t *)pud,
                                pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
                        spin_unlock(&init_mm.page_table_lock);
-                       last_map_addr = (addr & PUD_MASK) + PUD_SIZE;
+                       last_map_addr = next;
                        continue;
                }
  
diff --combined arch/x86/mm/tlb.c
@@@ -61,11 -61,13 +61,13 @@@ static DEFINE_PER_CPU_READ_MOSTLY(int, 
   */
  void leave_mm(int cpu)
  {
 -      struct mm_struct *active_mm = percpu_read(cpu_tlbstate.active_mm);
 -      if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
++      struct mm_struct *active_mm = this_cpu_read(cpu_tlbstate.active_mm);
 +      if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
                BUG();
-       cpumask_clear_cpu(cpu,
-                         mm_cpumask(this_cpu_read(cpu_tlbstate.active_mm)));
-       load_cr3(swapper_pg_dir);
+       if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) {
+               cpumask_clear_cpu(cpu, mm_cpumask(active_mm));
+               load_cr3(swapper_pg_dir);
+       }
  }
  EXPORT_SYMBOL_GPL(leave_mm);
  
@@@ -152,8 -154,8 +154,8 @@@ void smp_invalidate_interrupt(struct pt
                 * BUG();
                 */
  
 -      if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
 -              if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
 +      if (f->flush_mm == this_cpu_read(cpu_tlbstate.active_mm)) {
 +              if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
                        if (f->flush_va == TLB_FLUSH_ALL)
                                local_flush_tlb();
                        else
@@@ -322,7 -324,7 +324,7 @@@ void flush_tlb_page(struct vm_area_stru
  static void do_flush_tlb_all(void *info)
  {
        __flush_tlb_all();
 -      if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
 +      if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
                leave_mm(smp_processor_id());
  }