Merge branches 'x86/urgent', 'x86/amd-iommu', 'x86/apic', 'x86/cleanups', 'x86/core...
authorIngo Molnar <mingo@elte.hu>
Mon, 21 Jul 2008 14:37:17 +0000 (16:37 +0200)
committerIngo Molnar <mingo@elte.hu>
Mon, 21 Jul 2008 14:37:17 +0000 (16:37 +0200)
26 files changed:
1  2  3  4  5  6  7  8  9  10  11  12  13  14  15  16  17  18  19  20 
Documentation/kernel-parameters.txt
arch/x86/Kconfig.debug
arch/x86/ia32/ia32entry.S
arch/x86/kernel/amd_iommu.c
arch/x86/kernel/amd_iommu_init.c
arch/x86/kernel/apic_32.c
arch/x86/kernel/apic_64.c
arch/x86/kernel/cpu/common_64.c
arch/x86/kernel/early-quirks.c
arch/x86/kernel/entry_32.S
arch/x86/kernel/entry_64.S
arch/x86/kernel/nmi.c
arch/x86/kernel/paravirt.c
arch/x86/kernel/pci-dma.c
arch/x86/kernel/pci-gart_64.c
arch/x86/kernel/process.c
arch/x86/kernel/setup.c
arch/x86/kernel/signal_32.c
arch/x86/kernel/smpboot.c
arch/x86/mm/init_32.c
arch/x86/mm/pat.c
arch/x86/pci/pci.h
arch/x86/xen/enlighten.c
drivers/pci/intel-iommu.c
include/asm-x86/paravirt.h
include/asm-x86/setup.h

Simple merge
Simple merge
                        .quad 1b,ia32_badarg
                        .previous       
                        GET_THREAD_INFO(%r10)
          -             orl    $TS_COMPAT,threadinfo_status(%r10)
          -             testl  $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
          +             orl    $TS_COMPAT,TI_status(%r10)
---------- ---- ----    testl  $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
---------- ---- ----             TI_flags(%r10)
+++++++++++++++ ++++    testl  $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
                        CFI_REMEMBER_STATE
                        jnz  sysenter_tracesys
--------------- ----sysenter_do_call:   
                        cmpl    $(IA32_NR_syscalls-1),%eax
                        ja      ia32_badsys
+++++++++++++++ ++++sysenter_do_call:
                        IA32_ARG_FIXUP 1
                        call    *ia32_sys_call_table(,%rax,8)
                        movq    %rax,RAX-ARGOFFSET(%rsp)
@@@@@@@@@@@@@@@@@@@@@ -241,9 -241,9 -241,9 -241,9 -241,9 -241,9 -241,9 -241,9 -241,9 -241,9 -230,8 -241,9 -241,9 -241,9 -241,9 -244,8 -241,9 -241,9 -241,9 -241,9 +244,8 @@@@@@@@@@@@@@@@@@@@@ ENTRY(ia32_cstar_target
                        .quad 1b,ia32_badarg
                        .previous       
                        GET_THREAD_INFO(%r10)
          -             orl   $TS_COMPAT,threadinfo_status(%r10)
          -             testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
          +             orl   $TS_COMPAT,TI_status(%r10)
---------- ---- ----    testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
---------- ---- ----            TI_flags(%r10)
+++++++++++++++ ++++    testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
                        CFI_REMEMBER_STATE
                        jnz   cstar_tracesys
                    cstar_do_call:      
@@@@@@@@@@@@@@@@@@@@@ -321,7 -321,7 -321,7 -321,7 -321,7 -321,7 -321,7 -321,7 -321,7 -321,7 -310,7 -321,7 -321,7 -321,7 -321,7 -323,7 -321,7 -321,7 -321,7 -321,8 +323,8 @@@@@@@@@@@@@@@@@@@@@ ENTRY(ia32_syscall
                        /*CFI_REL_OFFSET        rflags,EFLAGS-RIP*/
                        /*CFI_REL_OFFSET        cs,CS-RIP*/
                        CFI_REL_OFFSET  rip,RIP-RIP
          -             swapgs
+++++++++++++++++++     PARAVIRT_ADJUST_EXCEPTION_FRAME
          +             SWAPGS
                        /*
                         * No need to follow this irqs on/off section: the syscall
                         * disabled irqs and here we enable it straight after entry:
                           this could be a problem. */
                        SAVE_ARGS 0,0,1
                        GET_THREAD_INFO(%r10)
          -             orl   $TS_COMPAT,threadinfo_status(%r10)
          -             testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
          +             orl   $TS_COMPAT,TI_status(%r10)
---------- ---- ----    testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
---------- ---- ----            TI_flags(%r10)
+++++++++++++++ ++++    testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
                        jnz ia32_tracesys
                    ia32_do_syscall:    
                        cmpl $(IA32_NR_syscalls-1),%eax
index f2766d8,8c3deb0,f2766d8,f2766d8,f2766d8,f2766d8,f2766d8,cf2f74b,f2766d8,f2766d8,0000000,f2766d8,f2766d8,f2766d8,f2766d8,f2766d8,f2766d8,f2766d8,f2766d8,f2766d8..c25210e
mode 100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,000000,100644,100644,100644,100644,100644,100644,100644,100644,100644..100644
--- /dev/null
------- -- ---------#include <asm/gart.h>
          +         /*
          +          * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
          +          * Author: Joerg Roedel <joerg.roedel@amd.com>
          +          *         Leo Duran <leo.duran@amd.com>
          +          *
          +          * This program is free software; you can redistribute it and/or modify it
          +          * under the terms of the GNU General Public License version 2 as published
          +          * by the Free Software Foundation.
          +          *
          +          * This program is distributed in the hope that it will be useful,
          +          * but WITHOUT ANY WARRANTY; without even the implied warranty of
          +          * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
          +          * GNU General Public License for more details.
          +          *
          +          * You should have received a copy of the GNU General Public License
          +          * along with this program; if not, write to the Free Software
          +          * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
          +          */
          +         
          +         #include <linux/pci.h>
          +         #include <linux/gfp.h>
          +         #include <linux/bitops.h>
          +         #include <linux/scatterlist.h>
          +         #include <linux/iommu-helper.h>
          +         #include <asm/proto.h>
- -------- ---------struct command {
+++++++ ++++++++++++#include <asm/iommu.h>
          +         #include <asm/amd_iommu_types.h>
          +         #include <asm/amd_iommu.h>
          +         
          +         #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
          +         
          +         #define to_pages(addr, size) \
          +              (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
          +         
+ ++++++++++++++++++#define EXIT_LOOP_COUNT 10000000
+ ++++++++++++++++++
          +         static DEFINE_RWLOCK(amd_iommu_devtable_lock);
          +         
- -------- ---------static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * general struct to manage commands send to an IOMMU
+ ++++++++++++++++++ */
+ ++++++++++++++++++struct iommu_cmd {
          +             u32 data[4];
          +         };
          +         
          +         static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
          +                                  struct unity_map_entry *e);
          +         
+ ++++++++++++++++++/* returns !0 if the IOMMU is caching non-present entries in its TLB */
          +         static int iommu_has_npcache(struct amd_iommu *iommu)
          +         {
          +             return iommu->cap & IOMMU_CAP_NPCACHE;
          +         }
          +         
- -------- ---------static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd)
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * IOMMU command queuing functions
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Writes the command to the IOMMUs command buffer and informs the
+ ++++++++++++++++++ * hardware about the new command. Must be called with iommu->lock held.
+ ++++++++++++++++++ */
+ ++++++++++++++++++static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
          +         {
          +             u32 tail, head;
          +             u8 *target;
          +         
          +             tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
          +             target = (iommu->cmd_buf + tail);
          +             memcpy_toio(target, cmd, sizeof(*cmd));
          +             tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size;
          +             head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
          +             if (tail == head)
          +                     return -ENOMEM;
          +             writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
          +         
          +             return 0;
          +         }
          +         
- -------- ---------    struct command cmd;
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * General queuing function for commands. Takes iommu->lock and calls
+ ++++++++++++++++++ * __iommu_queue_command().
+ ++++++++++++++++++ */
+ ++++++++++++++++++static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
          +         {
          +             unsigned long flags;
          +             int ret;
          +         
          +             spin_lock_irqsave(&iommu->lock, flags);
          +             ret = __iommu_queue_command(iommu, cmd);
          +             spin_unlock_irqrestore(&iommu->lock, flags);
          +         
          +             return ret;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function is called whenever we need to ensure that the IOMMU has
+ ++++++++++++++++++ * completed execution of all commands we sent. It sends a
+ ++++++++++++++++++ * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
+ ++++++++++++++++++ * us about that by writing a value to a physical address we pass with
+ ++++++++++++++++++ * the command.
+ ++++++++++++++++++ */
          +         static int iommu_completion_wait(struct amd_iommu *iommu)
          +         {
          +             int ret;
- -------- ---------    cmd.data[1] = HIGH_U32(ready_phys);
+ ++++++++++++++++++    struct iommu_cmd cmd;
          +             volatile u64 ready = 0;
          +             unsigned long ready_phys = virt_to_phys(&ready);
+ ++++++++++++++++++    unsigned long i = 0;
          +         
          +             memset(&cmd, 0, sizeof(cmd));
          +             cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK;
- -------- ---------    while (!ready)
+ ++++++++++++++++++    cmd.data[1] = upper_32_bits(ready_phys);
          +             cmd.data[2] = 1; /* value written to 'ready' */
          +             CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
          +         
          +             iommu->need_sync = 0;
          +         
          +             ret = iommu_queue_command(iommu, &cmd);
          +         
          +             if (ret)
          +                     return ret;
          +         
- -------- ---------    struct command cmd;
+ ++++++++++++++++++    while (!ready && (i < EXIT_LOOP_COUNT)) {
+ ++++++++++++++++++            ++i;
          +                     cpu_relax();
+ ++++++++++++++++++    }
+ ++++++++++++++++++
+ ++++++++++++++++++    if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit()))
+ ++++++++++++++++++            printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n");
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Command send function for invalidating a device table entry
+ ++++++++++++++++++ */
          +         static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
          +         {
- -------- ---------    struct command cmd;
+ ++++++++++++++++++    struct iommu_cmd cmd;
          +         
          +             BUG_ON(iommu == NULL);
          +         
          +             memset(&cmd, 0, sizeof(cmd));
          +             CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY);
          +             cmd.data[0] = devid;
          +         
          +             iommu->need_sync = 1;
          +         
          +             return iommu_queue_command(iommu, &cmd);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Generic command send function for invalidaing TLB entries
+ ++++++++++++++++++ */
          +         static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
          +                     u64 address, u16 domid, int pde, int s)
          +         {
- -------- ---------    cmd.data[3] = HIGH_U32(address);
- -------- ---------    if (s)
+ ++++++++++++++++++    struct iommu_cmd cmd;
          +         
          +             memset(&cmd, 0, sizeof(cmd));
          +             address &= PAGE_MASK;
          +             CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
          +             cmd.data[1] |= domid;
          +             cmd.data[2] = LOW_U32(address);
- -------- ---------    if (pde)
+ ++++++++++++++++++    cmd.data[3] = upper_32_bits(address);
+ ++++++++++++++++++    if (s) /* size bit - we flush more than one 4kb page */
          +                     cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
- -------- ---------    _bdf = (pcidev->bus->number << 8) | pcidev->devfn;
+ ++++++++++++++++++    if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
          +                     cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
          +         
          +             iommu->need_sync = 1;
          +         
          +             return iommu_queue_command(iommu, &cmd);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * TLB invalidation function which is called from the mapping functions.
+ ++++++++++++++++++ * It invalidates a single PTE if the range to flush is within a single
+ ++++++++++++++++++ * page. Otherwise it flushes the whole TLB of the IOMMU.
+ ++++++++++++++++++ */
          +         static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
          +                     u64 address, size_t size)
          +         {
          +             int s = 0;
          +             unsigned pages = to_pages(address, size);
          +         
          +             address &= PAGE_MASK;
          +         
          +             if (pages > 1) {
          +                     /*
          +                      * If we have to flush more than one page, flush all
          +                      * TLB entries for this domain
          +                      */
          +                     address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
          +                     s = 1;
          +             }
          +         
          +             iommu_queue_inv_iommu_pages(iommu, address, domid, 0, s);
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * The functions below are used the create the page table mappings for
+ ++++++++++++++++++ * unity mapped regions.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Generic mapping functions. It maps a physical address into a DMA
+ ++++++++++++++++++ * address space. It allocates the page table pages if necessary.
+ ++++++++++++++++++ * In the future it can be extended to a generic mapping function
+ ++++++++++++++++++ * supporting all features of AMD IOMMU page tables like level skipping
+ ++++++++++++++++++ * and full 64 bit address spaces.
+ ++++++++++++++++++ */
          +         static int iommu_map(struct protection_domain *dom,
          +                          unsigned long bus_addr,
          +                          unsigned long phys_addr,
          +                          int prot)
          +         {
          +             u64 __pte, *pte, *page;
          +         
          +             bus_addr  = PAGE_ALIGN(bus_addr);
          +             phys_addr = PAGE_ALIGN(bus_addr);
          +         
          +             /* only support 512GB address spaces for now */
          +             if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
          +                     return -EINVAL;
          +         
          +             pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
          +         
          +             if (!IOMMU_PTE_PRESENT(*pte)) {
          +                     page = (u64 *)get_zeroed_page(GFP_KERNEL);
          +                     if (!page)
          +                             return -ENOMEM;
          +                     *pte = IOMMU_L2_PDE(virt_to_phys(page));
          +             }
          +         
          +             pte = IOMMU_PTE_PAGE(*pte);
          +             pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
          +         
          +             if (!IOMMU_PTE_PRESENT(*pte)) {
          +                     page = (u64 *)get_zeroed_page(GFP_KERNEL);
          +                     if (!page)
          +                             return -ENOMEM;
          +                     *pte = IOMMU_L1_PDE(virt_to_phys(page));
          +             }
          +         
          +             pte = IOMMU_PTE_PAGE(*pte);
          +             pte = &pte[IOMMU_PTE_L0_INDEX(bus_addr)];
          +         
          +             if (IOMMU_PTE_PRESENT(*pte))
          +                     return -EBUSY;
          +         
          +             __pte = phys_addr | IOMMU_PTE_P;
          +             if (prot & IOMMU_PROT_IR)
          +                     __pte |= IOMMU_PTE_IR;
          +             if (prot & IOMMU_PROT_IW)
          +                     __pte |= IOMMU_PTE_IW;
          +         
          +             *pte = __pte;
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function checks if a specific unity mapping entry is needed for
+ ++++++++++++++++++ * this specific IOMMU.
+ ++++++++++++++++++ */
          +         static int iommu_for_unity_map(struct amd_iommu *iommu,
          +                                    struct unity_map_entry *entry)
          +         {
          +             u16 bdf, i;
          +         
          +             for (i = entry->devid_start; i <= entry->devid_end; ++i) {
          +                     bdf = amd_iommu_alias_table[i];
          +                     if (amd_iommu_rlookup_table[bdf] == iommu)
          +                             return 1;
          +             }
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Init the unity mappings for a specific IOMMU in the system
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * Basically iterates over all unity mapping entries and applies them to
+ ++++++++++++++++++ * the default domain DMA of that IOMMU if necessary.
+ ++++++++++++++++++ */
          +         static int iommu_init_unity_mappings(struct amd_iommu *iommu)
          +         {
          +             struct unity_map_entry *entry;
          +             int ret;
          +         
          +             list_for_each_entry(entry, &amd_iommu_unity_map, list) {
          +                     if (!iommu_for_unity_map(iommu, entry))
          +                             continue;
          +                     ret = dma_ops_unity_map(iommu->default_dom, entry);
          +                     if (ret)
          +                             return ret;
          +             }
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function actually applies the mapping to the page table of the
+ ++++++++++++++++++ * dma_ops domain.
+ ++++++++++++++++++ */
          +         static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
          +                                  struct unity_map_entry *e)
          +         {
          +             u64 addr;
          +             int ret;
          +         
          +             for (addr = e->address_start; addr < e->address_end;
          +                  addr += PAGE_SIZE) {
          +                     ret = iommu_map(&dma_dom->domain, addr, addr, e->prot);
          +                     if (ret)
          +                             return ret;
          +                     /*
          +                      * if unity mapping is in aperture range mark the page
          +                      * as allocated in the aperture
          +                      */
          +                     if (addr < dma_dom->aperture_size)
          +                             __set_bit(addr >> PAGE_SHIFT, dma_dom->bitmap);
          +             }
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Inits the unity mappings required for a specific device
+ ++++++++++++++++++ */
          +         static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
          +                                               u16 devid)
          +         {
          +             struct unity_map_entry *e;
          +             int ret;
          +         
          +             list_for_each_entry(e, &amd_iommu_unity_map, list) {
          +                     if (!(devid >= e->devid_start && devid <= e->devid_end))
          +                             continue;
          +                     ret = dma_ops_unity_map(dma_dom, e);
          +                     if (ret)
          +                             return ret;
          +             }
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * The next functions belong to the address allocator for the dma_ops
+ ++++++++++++++++++ * interface functions. They work like the allocators in the other IOMMU
+ ++++++++++++++++++ * drivers. Its basically a bitmap which marks the allocated pages in
+ ++++++++++++++++++ * the aperture. Maybe it could be enhanced in the future to a more
+ ++++++++++++++++++ * efficient allocator.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
          +         static unsigned long dma_mask_to_pages(unsigned long mask)
          +         {
          +             return (mask >> PAGE_SHIFT) +
          +                     (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The address allocator core function.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * called with domain->lock held
+ ++++++++++++++++++ */
          +         static unsigned long dma_ops_alloc_addresses(struct device *dev,
          +                                                  struct dma_ops_domain *dom,
          +                                                  unsigned int pages)
          +         {
          +             unsigned long limit = dma_mask_to_pages(*dev->dma_mask);
          +             unsigned long address;
          +             unsigned long size = dom->aperture_size >> PAGE_SHIFT;
          +             unsigned long boundary_size;
          +         
          +             boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
          +                             PAGE_SIZE) >> PAGE_SHIFT;
          +             limit = limit < size ? limit : size;
          +         
          +             if (dom->next_bit >= limit)
          +                     dom->next_bit = 0;
          +         
          +             address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages,
          +                             0 , boundary_size, 0);
          +             if (address == -1)
          +                     address = iommu_area_alloc(dom->bitmap, limit, 0, pages,
          +                                     0, boundary_size, 0);
          +         
          +             if (likely(address != -1)) {
          +                     dom->next_bit = address + pages;
          +                     address <<= PAGE_SHIFT;
          +             } else
          +                     address = bad_dma_address;
          +         
          +             WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
          +         
          +             return address;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The address free function.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * called with domain->lock held
+ ++++++++++++++++++ */
          +         static void dma_ops_free_addresses(struct dma_ops_domain *dom,
          +                                        unsigned long address,
          +                                        unsigned int pages)
          +         {
          +             address >>= PAGE_SHIFT;
          +             iommu_area_free(dom->bitmap, address, pages);
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * The next functions belong to the domain allocation. A domain is
+ ++++++++++++++++++ * allocated for every IOMMU as the default domain. If device isolation
+ ++++++++++++++++++ * is enabled, every device get its own domain. The most important thing
+ ++++++++++++++++++ * about domains is the page table mapping the DMA address space they
+ ++++++++++++++++++ * contain.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
          +         static u16 domain_id_alloc(void)
          +         {
          +             unsigned long flags;
          +             int id;
          +         
          +             write_lock_irqsave(&amd_iommu_devtable_lock, flags);
          +             id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
          +             BUG_ON(id == 0);
          +             if (id > 0 && id < MAX_DOMAIN_ID)
          +                     __set_bit(id, amd_iommu_pd_alloc_bitmap);
          +             else
          +                     id = 0;
          +             write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
          +         
          +             return id;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Used to reserve address ranges in the aperture (e.g. for exclusion
+ ++++++++++++++++++ * ranges.
+ ++++++++++++++++++ */
          +         static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
          +                                           unsigned long start_page,
          +                                           unsigned int pages)
          +         {
          +             unsigned int last_page = dom->aperture_size >> PAGE_SHIFT;
          +         
          +             if (start_page + pages > last_page)
          +                     pages = last_page - start_page;
          +         
          +             set_bit_string(dom->bitmap, start_page, pages);
          +         }
          +         
          +         static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
          +         {
          +             int i, j;
          +             u64 *p1, *p2, *p3;
          +         
          +             p1 = dma_dom->domain.pt_root;
          +         
          +             if (!p1)
          +                     return;
          +         
          +             for (i = 0; i < 512; ++i) {
          +                     if (!IOMMU_PTE_PRESENT(p1[i]))
          +                             continue;
          +         
          +                     p2 = IOMMU_PTE_PAGE(p1[i]);
          +                     for (j = 0; j < 512; ++i) {
          +                             if (!IOMMU_PTE_PRESENT(p2[j]))
          +                                     continue;
          +                             p3 = IOMMU_PTE_PAGE(p2[j]);
          +                             free_page((unsigned long)p3);
          +                     }
          +         
          +                     free_page((unsigned long)p2);
          +             }
          +         
          +             free_page((unsigned long)p1);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Free a domain, only used if something went wrong in the
+ ++++++++++++++++++ * allocation path and we need to free an already allocated page table
+ ++++++++++++++++++ */
          +         static void dma_ops_domain_free(struct dma_ops_domain *dom)
          +         {
          +             if (!dom)
          +                     return;
          +         
          +             dma_ops_free_pagetable(dom);
          +         
          +             kfree(dom->pte_pages);
          +         
          +             kfree(dom->bitmap);
          +         
          +             kfree(dom);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Allocates a new protection domain usable for the dma_ops functions.
+ ++++++++++++++++++ * It also intializes the page table and the address allocator data
+ ++++++++++++++++++ * structures required for the dma_ops interface
+ ++++++++++++++++++ */
          +         static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
          +                                                        unsigned order)
          +         {
          +             struct dma_ops_domain *dma_dom;
          +             unsigned i, num_pte_pages;
          +             u64 *l2_pde;
          +             u64 address;
          +         
          +             /*
          +              * Currently the DMA aperture must be between 32 MB and 1GB in size
          +              */
          +             if ((order < 25) || (order > 30))
          +                     return NULL;
          +         
          +             dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
          +             if (!dma_dom)
          +                     return NULL;
          +         
          +             spin_lock_init(&dma_dom->domain.lock);
          +         
          +             dma_dom->domain.id = domain_id_alloc();
          +             if (dma_dom->domain.id == 0)
          +                     goto free_dma_dom;
          +             dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
          +             dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
          +             dma_dom->domain.priv = dma_dom;
          +             if (!dma_dom->domain.pt_root)
          +                     goto free_dma_dom;
          +             dma_dom->aperture_size = (1ULL << order);
          +             dma_dom->bitmap = kzalloc(dma_dom->aperture_size / (PAGE_SIZE * 8),
          +                                       GFP_KERNEL);
          +             if (!dma_dom->bitmap)
          +                     goto free_dma_dom;
          +             /*
          +              * mark the first page as allocated so we never return 0 as
          +              * a valid dma-address. So we can use 0 as error value
          +              */
          +             dma_dom->bitmap[0] = 1;
          +             dma_dom->next_bit = 0;
          +         
+ ++++++++++++++++++    /* Intialize the exclusion range if necessary */
          +             if (iommu->exclusion_start &&
          +                 iommu->exclusion_start < dma_dom->aperture_size) {
          +                     unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
          +                     int pages = to_pages(iommu->exclusion_start,
          +                                     iommu->exclusion_length);
          +                     dma_ops_reserve_addresses(dma_dom, startpage, pages);
          +             }
          +         
+ ++++++++++++++++++    /*
+ ++++++++++++++++++     * At the last step, build the page tables so we don't need to
+ ++++++++++++++++++     * allocate page table pages in the dma_ops mapping/unmapping
+ ++++++++++++++++++     * path.
+ ++++++++++++++++++     */
          +             num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512);
          +             dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *),
          +                             GFP_KERNEL);
          +             if (!dma_dom->pte_pages)
          +                     goto free_dma_dom;
          +         
          +             l2_pde = (u64 *)get_zeroed_page(GFP_KERNEL);
          +             if (l2_pde == NULL)
          +                     goto free_dma_dom;
          +         
          +             dma_dom->domain.pt_root[0] = IOMMU_L2_PDE(virt_to_phys(l2_pde));
          +         
          +             for (i = 0; i < num_pte_pages; ++i) {
          +                     dma_dom->pte_pages[i] = (u64 *)get_zeroed_page(GFP_KERNEL);
          +                     if (!dma_dom->pte_pages[i])
          +                             goto free_dma_dom;
          +                     address = virt_to_phys(dma_dom->pte_pages[i]);
          +                     l2_pde[i] = IOMMU_L1_PDE(address);
          +             }
          +         
          +             return dma_dom;
          +         
          +         free_dma_dom:
          +             dma_ops_domain_free(dma_dom);
          +         
          +             return NULL;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Find out the protection domain structure for a given PCI device. This
+ ++++++++++++++++++ * will give us the pointer to the page table root for example.
+ ++++++++++++++++++ */
          +         static struct protection_domain *domain_for_device(u16 devid)
          +         {
          +             struct protection_domain *dom;
          +             unsigned long flags;
          +         
          +             read_lock_irqsave(&amd_iommu_devtable_lock, flags);
          +             dom = amd_iommu_pd_table[devid];
          +             read_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
          +         
          +             return dom;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * If a device is not yet associated with a domain, this function does
+ ++++++++++++++++++ * assigns it visible for the hardware
+ ++++++++++++++++++ */
          +         static void set_device_domain(struct amd_iommu *iommu,
          +                                   struct protection_domain *domain,
          +                                   u16 devid)
          +         {
          +             unsigned long flags;
          +         
          +             u64 pte_root = virt_to_phys(domain->pt_root);
          +         
          +             pte_root |= (domain->mode & 0x07) << 9;
          +             pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | 2;
          +         
          +             write_lock_irqsave(&amd_iommu_devtable_lock, flags);
          +             amd_iommu_dev_table[devid].data[0] = pte_root;
          +             amd_iommu_dev_table[devid].data[1] = pte_root >> 32;
          +             amd_iommu_dev_table[devid].data[2] = domain->id;
          +         
          +             amd_iommu_pd_table[devid] = domain;
          +             write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
          +         
          +             iommu_queue_inv_dev_entry(iommu, devid);
          +         
          +             iommu->need_sync = 1;
          +         }
          +         
+ ++++++++++++++++++/*****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * The next functions belong to the dma_ops mapping/unmapping code.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ *****************************************************************************/
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * In the dma_ops path we only have the struct device. This function
+ ++++++++++++++++++ * finds the corresponding IOMMU, the protection domain and the
+ ++++++++++++++++++ * requestor id for a given device.
+ ++++++++++++++++++ * If the device is not yet associated with a domain this is also done
+ ++++++++++++++++++ * in this function.
+ ++++++++++++++++++ */
          +         static int get_device_resources(struct device *dev,
          +                                     struct amd_iommu **iommu,
          +                                     struct protection_domain **domain,
          +                                     u16 *bdf)
          +         {
          +             struct dma_ops_domain *dma_dom;
          +             struct pci_dev *pcidev;
          +             u16 _bdf;
          +         
          +             BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask);
          +         
          +             pcidev = to_pci_dev(dev);
+ ++++++++++++++++++    _bdf = calc_devid(pcidev->bus->number, pcidev->devfn);
          +         
+ ++++++++++++++++++    /* device not translated by any IOMMU in the system? */
          +             if (_bdf >= amd_iommu_last_bdf) {
          +                     *iommu = NULL;
          +                     *domain = NULL;
          +                     *bdf = 0xffff;
          +                     return 0;
          +             }
          +         
          +             *bdf = amd_iommu_alias_table[_bdf];
          +         
          +             *iommu = amd_iommu_rlookup_table[*bdf];
          +             if (*iommu == NULL)
          +                     return 0;
          +             dma_dom = (*iommu)->default_dom;
          +             *domain = domain_for_device(*bdf);
          +             if (*domain == NULL) {
          +                     *domain = &dma_dom->domain;
          +                     set_device_domain(*iommu, *domain, *bdf);
          +                     printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
          +                                     "device ", (*domain)->id);
          +                     print_devid(_bdf, 1);
          +             }
          +         
          +             return 1;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This is the generic map function. It maps one 4kb page at paddr to
+ ++++++++++++++++++ * the given address in the DMA address space for the domain.
+ ++++++++++++++++++ */
          +         static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
          +                                          struct dma_ops_domain *dom,
          +                                          unsigned long address,
          +                                          phys_addr_t paddr,
          +                                          int direction)
          +         {
          +             u64 *pte, __pte;
          +         
          +             WARN_ON(address > dom->aperture_size);
          +         
          +             paddr &= PAGE_MASK;
          +         
          +             pte  = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
          +             pte += IOMMU_PTE_L0_INDEX(address);
          +         
          +             __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
          +         
          +             if (direction == DMA_TO_DEVICE)
          +                     __pte |= IOMMU_PTE_IR;
          +             else if (direction == DMA_FROM_DEVICE)
          +                     __pte |= IOMMU_PTE_IW;
          +             else if (direction == DMA_BIDIRECTIONAL)
          +                     __pte |= IOMMU_PTE_IR | IOMMU_PTE_IW;
          +         
          +             WARN_ON(*pte);
          +         
          +             *pte = __pte;
          +         
          +             return (dma_addr_t)address;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The generic unmapping function for on page in the DMA address space.
+ ++++++++++++++++++ */
          +         static void dma_ops_domain_unmap(struct amd_iommu *iommu,
          +                                      struct dma_ops_domain *dom,
          +                                      unsigned long address)
          +         {
          +             u64 *pte;
          +         
          +             if (address >= dom->aperture_size)
          +                     return;
          +         
          +             WARN_ON(address & 0xfffULL || address > dom->aperture_size);
          +         
          +             pte  = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
          +             pte += IOMMU_PTE_L0_INDEX(address);
          +         
          +             WARN_ON(!*pte);
          +         
          +             *pte = 0ULL;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function contains common code for mapping of a physically
+ ++++++++++++++++++ * contiguous memory region into DMA address space. It is uses by all
+ ++++++++++++++++++ * mapping functions provided by this IOMMU driver.
+ ++++++++++++++++++ * Must be called with the domain lock held.
+ ++++++++++++++++++ */
          +         static dma_addr_t __map_single(struct device *dev,
          +                                    struct amd_iommu *iommu,
          +                                    struct dma_ops_domain *dma_dom,
          +                                    phys_addr_t paddr,
          +                                    size_t size,
          +                                    int dir)
          +         {
          +             dma_addr_t offset = paddr & ~PAGE_MASK;
          +             dma_addr_t address, start;
          +             unsigned int pages;
          +             int i;
          +         
          +             pages = to_pages(paddr, size);
          +             paddr &= PAGE_MASK;
          +         
          +             address = dma_ops_alloc_addresses(dev, dma_dom, pages);
          +             if (unlikely(address == bad_dma_address))
          +                     goto out;
          +         
          +             start = address;
          +             for (i = 0; i < pages; ++i) {
          +                     dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
          +                     paddr += PAGE_SIZE;
          +                     start += PAGE_SIZE;
          +             }
          +             address += offset;
          +         
          +         out:
          +             return address;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Does the reverse of the __map_single function. Must be called with
+ ++++++++++++++++++ * the domain lock held too
+ ++++++++++++++++++ */
          +         static void __unmap_single(struct amd_iommu *iommu,
          +                                struct dma_ops_domain *dma_dom,
          +                                dma_addr_t dma_addr,
          +                                size_t size,
          +                                int dir)
          +         {
          +             dma_addr_t i, start;
          +             unsigned int pages;
          +         
          +             if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size))
          +                     return;
          +         
          +             pages = to_pages(dma_addr, size);
          +             dma_addr &= PAGE_MASK;
          +             start = dma_addr;
          +         
          +             for (i = 0; i < pages; ++i) {
          +                     dma_ops_domain_unmap(iommu, dma_dom, start);
          +                     start += PAGE_SIZE;
          +             }
          +         
          +             dma_ops_free_addresses(dma_dom, dma_addr, pages);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The exported map_single function for dma_ops.
+ ++++++++++++++++++ */
          +         static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
          +                                  size_t size, int dir)
          +         {
          +             unsigned long flags;
          +             struct amd_iommu *iommu;
          +             struct protection_domain *domain;
          +             u16 devid;
          +             dma_addr_t addr;
          +         
          +             get_device_resources(dev, &iommu, &domain, &devid);
          +         
          +             if (iommu == NULL || domain == NULL)
+ ++++++++++++++++++            /* device not handled by any AMD IOMMU */
          +                     return (dma_addr_t)paddr;
          +         
          +             spin_lock_irqsave(&domain->lock, flags);
          +             addr = __map_single(dev, iommu, domain->priv, paddr, size, dir);
          +             if (addr == bad_dma_address)
          +                     goto out;
          +         
          +             if (iommu_has_npcache(iommu))
          +                     iommu_flush_pages(iommu, domain->id, addr, size);
          +         
          +             if (iommu->need_sync)
          +                     iommu_completion_wait(iommu);
          +         
          +         out:
          +             spin_unlock_irqrestore(&domain->lock, flags);
          +         
          +             return addr;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The exported unmap_single function for dma_ops.
+ ++++++++++++++++++ */
          +         static void unmap_single(struct device *dev, dma_addr_t dma_addr,
          +                              size_t size, int dir)
          +         {
          +             unsigned long flags;
          +             struct amd_iommu *iommu;
          +             struct protection_domain *domain;
          +             u16 devid;
          +         
          +             if (!get_device_resources(dev, &iommu, &domain, &devid))
+ ++++++++++++++++++            /* device not handled by any AMD IOMMU */
          +                     return;
          +         
          +             spin_lock_irqsave(&domain->lock, flags);
          +         
          +             __unmap_single(iommu, domain->priv, dma_addr, size, dir);
          +         
          +             iommu_flush_pages(iommu, domain->id, dma_addr, size);
          +         
          +             if (iommu->need_sync)
          +                     iommu_completion_wait(iommu);
          +         
          +             spin_unlock_irqrestore(&domain->lock, flags);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This is a special map_sg function which is used if we should map a
+ ++++++++++++++++++ * device which is not handled by an AMD IOMMU in the system.
+ ++++++++++++++++++ */
          +         static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist,
          +                                int nelems, int dir)
          +         {
          +             struct scatterlist *s;
          +             int i;
          +         
          +             for_each_sg(sglist, s, nelems, i) {
          +                     s->dma_address = (dma_addr_t)sg_phys(s);
          +                     s->dma_length  = s->length;
          +             }
          +         
          +             return nelems;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The exported map_sg function for dma_ops (handles scatter-gather
+ ++++++++++++++++++ * lists).
+ ++++++++++++++++++ */
          +         static int map_sg(struct device *dev, struct scatterlist *sglist,
          +                       int nelems, int dir)
          +         {
          +             unsigned long flags;
          +             struct amd_iommu *iommu;
          +             struct protection_domain *domain;
          +             u16 devid;
          +             int i;
          +             struct scatterlist *s;
          +             phys_addr_t paddr;
          +             int mapped_elems = 0;
          +         
          +             get_device_resources(dev, &iommu, &domain, &devid);
          +         
          +             if (!iommu || !domain)
          +                     return map_sg_no_iommu(dev, sglist, nelems, dir);
          +         
          +             spin_lock_irqsave(&domain->lock, flags);
          +         
          +             for_each_sg(sglist, s, nelems, i) {
          +                     paddr = sg_phys(s);
          +         
          +                     s->dma_address = __map_single(dev, iommu, domain->priv,
          +                                                   paddr, s->length, dir);
          +         
          +                     if (s->dma_address) {
          +                             s->dma_length = s->length;
          +                             mapped_elems++;
          +                     } else
          +                             goto unmap;
          +                     if (iommu_has_npcache(iommu))
          +                             iommu_flush_pages(iommu, domain->id, s->dma_address,
          +                                               s->dma_length);
          +             }
          +         
          +             if (iommu->need_sync)
          +                     iommu_completion_wait(iommu);
          +         
          +         out:
          +             spin_unlock_irqrestore(&domain->lock, flags);
          +         
          +             return mapped_elems;
          +         unmap:
          +             for_each_sg(sglist, s, mapped_elems, i) {
          +                     if (s->dma_address)
          +                             __unmap_single(iommu, domain->priv, s->dma_address,
          +                                            s->dma_length, dir);
          +                     s->dma_address = s->dma_length = 0;
          +             }
          +         
          +             mapped_elems = 0;
          +         
          +             goto out;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The exported map_sg function for dma_ops (handles scatter-gather
+ ++++++++++++++++++ * lists).
+ ++++++++++++++++++ */
          +         static void unmap_sg(struct device *dev, struct scatterlist *sglist,
          +                          int nelems, int dir)
          +         {
          +             unsigned long flags;
          +             struct amd_iommu *iommu;
          +             struct protection_domain *domain;
          +             struct scatterlist *s;
          +             u16 devid;
          +             int i;
          +         
          +             if (!get_device_resources(dev, &iommu, &domain, &devid))
          +                     return;
          +         
          +             spin_lock_irqsave(&domain->lock, flags);
          +         
          +             for_each_sg(sglist, s, nelems, i) {
          +                     __unmap_single(iommu, domain->priv, s->dma_address,
          +                                    s->dma_length, dir);
          +                     iommu_flush_pages(iommu, domain->id, s->dma_address,
          +                                       s->dma_length);
          +                     s->dma_address = s->dma_length = 0;
          +             }
          +         
          +             if (iommu->need_sync)
          +                     iommu_completion_wait(iommu);
          +         
          +             spin_unlock_irqrestore(&domain->lock, flags);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The exported alloc_coherent function for dma_ops.
+ ++++++++++++++++++ */
          +         static void *alloc_coherent(struct device *dev, size_t size,
          +                                 dma_addr_t *dma_addr, gfp_t flag)
          +         {
          +             unsigned long flags;
          +             void *virt_addr;
          +             struct amd_iommu *iommu;
          +             struct protection_domain *domain;
          +             u16 devid;
          +             phys_addr_t paddr;
          +         
          +             virt_addr = (void *)__get_free_pages(flag, get_order(size));
          +             if (!virt_addr)
          +                     return 0;
          +         
          +             memset(virt_addr, 0, size);
          +             paddr = virt_to_phys(virt_addr);
          +         
          +             get_device_resources(dev, &iommu, &domain, &devid);
          +         
          +             if (!iommu || !domain) {
          +                     *dma_addr = (dma_addr_t)paddr;
          +                     return virt_addr;
          +             }
          +         
          +             spin_lock_irqsave(&domain->lock, flags);
          +         
          +             *dma_addr = __map_single(dev, iommu, domain->priv, paddr,
          +                                      size, DMA_BIDIRECTIONAL);
          +         
          +             if (*dma_addr == bad_dma_address) {
          +                     free_pages((unsigned long)virt_addr, get_order(size));
          +                     virt_addr = NULL;
          +                     goto out;
          +             }
          +         
          +             if (iommu_has_npcache(iommu))
          +                     iommu_flush_pages(iommu, domain->id, *dma_addr, size);
          +         
          +             if (iommu->need_sync)
          +                     iommu_completion_wait(iommu);
          +         
          +         out:
          +             spin_unlock_irqrestore(&domain->lock, flags);
          +         
          +             return virt_addr;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The exported free_coherent function for dma_ops.
+ ++++++++++++++++++ * FIXME: fix the generic x86 DMA layer so that it actually calls that
+ ++++++++++++++++++ *        function.
+ ++++++++++++++++++ */
          +         static void free_coherent(struct device *dev, size_t size,
          +                               void *virt_addr, dma_addr_t dma_addr)
          +         {
          +             unsigned long flags;
          +             struct amd_iommu *iommu;
          +             struct protection_domain *domain;
          +             u16 devid;
          +         
          +             get_device_resources(dev, &iommu, &domain, &devid);
          +         
          +             if (!iommu || !domain)
          +                     goto free_mem;
          +         
          +             spin_lock_irqsave(&domain->lock, flags);
          +         
          +             __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
          +             iommu_flush_pages(iommu, domain->id, dma_addr, size);
          +         
          +             if (iommu->need_sync)
          +                     iommu_completion_wait(iommu);
          +         
          +             spin_unlock_irqrestore(&domain->lock, flags);
          +         
          +         free_mem:
          +             free_pages((unsigned long)virt_addr, get_order(size));
          +         }
          +         
          +         /*
+ ++++++++++++++++++ * The function for pre-allocating protection domains.
+ ++++++++++++++++++ *
          +          * If the driver core informs the DMA layer if a driver grabs a device
          +          * we don't need to preallocate the protection domains anymore.
          +          * For now we have to.
          +          */
          +         void prealloc_protection_domains(void)
          +         {
          +             struct pci_dev *dev = NULL;
          +             struct dma_ops_domain *dma_dom;
          +             struct amd_iommu *iommu;
          +             int order = amd_iommu_aperture_order;
          +             u16 devid;
          +         
          +             while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
          +                     devid = (dev->bus->number << 8) | dev->devfn;
          +                     if (devid >= amd_iommu_last_bdf)
          +                             continue;
          +                     devid = amd_iommu_alias_table[devid];
          +                     if (domain_for_device(devid))
          +                             continue;
          +                     iommu = amd_iommu_rlookup_table[devid];
          +                     if (!iommu)
          +                             continue;
          +                     dma_dom = dma_ops_domain_alloc(iommu, order);
          +                     if (!dma_dom)
          +                             continue;
          +                     init_unity_mappings_for_device(dma_dom, devid);
          +                     set_device_domain(iommu, &dma_dom->domain, devid);
          +                     printk(KERN_INFO "AMD IOMMU: Allocated domain %d for device ",
          +                            dma_dom->domain.id);
          +                     print_devid(devid, 1);
          +             }
          +         }
          +         
          +         static struct dma_mapping_ops amd_iommu_dma_ops = {
          +             .alloc_coherent = alloc_coherent,
          +             .free_coherent = free_coherent,
          +             .map_single = map_single,
          +             .unmap_single = unmap_single,
          +             .map_sg = map_sg,
          +             .unmap_sg = unmap_sg,
          +         };
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The function which clues the AMD IOMMU driver into dma_ops.
+ ++++++++++++++++++ */
          +         int __init amd_iommu_init_dma_ops(void)
          +         {
          +             struct amd_iommu *iommu;
          +             int order = amd_iommu_aperture_order;
          +             int ret;
          +         
+ ++++++++++++++++++    /*
+ ++++++++++++++++++     * first allocate a default protection domain for every IOMMU we
+ ++++++++++++++++++     * found in the system. Devices not assigned to any other
+ ++++++++++++++++++     * protection domain will be assigned to the default one.
+ ++++++++++++++++++     */
          +             list_for_each_entry(iommu, &amd_iommu_list, list) {
          +                     iommu->default_dom = dma_ops_domain_alloc(iommu, order);
          +                     if (iommu->default_dom == NULL)
          +                             return -ENOMEM;
          +                     ret = iommu_init_unity_mappings(iommu);
          +                     if (ret)
          +                             goto free_domains;
          +             }
          +         
+ ++++++++++++++++++    /*
+ ++++++++++++++++++     * If device isolation is enabled, pre-allocate the protection
+ ++++++++++++++++++     * domains for each device.
+ ++++++++++++++++++     */
          +             if (amd_iommu_isolate)
          +                     prealloc_protection_domains();
          +         
          +             iommu_detected = 1;
          +             force_iommu = 1;
          +             bad_dma_address = 0;
          +         #ifdef CONFIG_GART_IOMMU
          +             gart_iommu_aperture_disabled = 1;
          +             gart_iommu_aperture = 0;
          +         #endif
          +         
+ ++++++++++++++++++    /* Make the driver finally visible to the drivers */
          +             dma_ops = &amd_iommu_dma_ops;
          +         
          +             return 0;
          +         
          +         free_domains:
          +         
          +             list_for_each_entry(iommu, &amd_iommu_list, list) {
          +                     if (iommu->default_dom)
          +                             dma_ops_domain_free(iommu->default_dom);
          +             }
          +         
          +             return ret;
          +         }
index 2a13e43,7661b02,2a13e43,2a13e43,2a13e43,2a13e43,2a13e43,6643828,2a13e43,2a13e43,0000000,2a13e43,2a13e43,2a13e43,2a13e43,2a13e43,2a13e43,2a13e43,2a13e43,2a13e43..c9d8ff2
mode 100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,000000,100644,100644,100644,100644,100644,100644,100644,100644,100644..100644
--- /dev/null
------- -- ---------#include <asm/gart.h>
          +         /*
          +          * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
          +          * Author: Joerg Roedel <joerg.roedel@amd.com>
          +          *         Leo Duran <leo.duran@amd.com>
          +          *
          +          * This program is free software; you can redistribute it and/or modify it
          +          * under the terms of the GNU General Public License version 2 as published
          +          * by the Free Software Foundation.
          +          *
          +          * This program is distributed in the hope that it will be useful,
          +          * but WITHOUT ANY WARRANTY; without even the implied warranty of
          +          * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
          +          * GNU General Public License for more details.
          +          *
          +          * You should have received a copy of the GNU General Public License
          +          * along with this program; if not, write to the Free Software
          +          * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
          +          */
          +         
          +         #include <linux/pci.h>
          +         #include <linux/acpi.h>
          +         #include <linux/gfp.h>
          +         #include <linux/list.h>
          +         #include <linux/sysdev.h>
          +         #include <asm/pci-direct.h>
          +         #include <asm/amd_iommu_types.h>
          +         #include <asm/amd_iommu.h>
- -------- ---------#define UPDATE_LAST_BDF(x) do {\
- -------- ---------    if ((x) > amd_iommu_last_bdf) \
- -------- ---------            amd_iommu_last_bdf = (x); \
- -------- ---------    } while (0);
- -------- ---------
- -------- ---------#define DEVID(bus, devfn) (((bus) << 8) | (devfn))
+++++++ ++++++++++++#include <asm/iommu.h>
          +         
          +         /*
          +          * definitions for the ACPI scanning code
          +          */
- -------- ---------#define TBL_SIZE(x) (1 << (PAGE_SHIFT + get_order(amd_iommu_last_bdf * (x))))
          +         #define PCI_BUS(x) (((x) >> 8) & 0xff)
          +         #define IVRS_HEADER_LENGTH 48
- -------- ---------u16 amd_iommu_last_bdf;
- -------- ---------struct list_head amd_iommu_unity_map;
- -------- ---------unsigned amd_iommu_aperture_order = 26;
- -------- ---------int amd_iommu_isolate;
          +         
          +         #define ACPI_IVHD_TYPE                  0x10
          +         #define ACPI_IVMD_TYPE_ALL              0x20
          +         #define ACPI_IVMD_TYPE                  0x21
          +         #define ACPI_IVMD_TYPE_RANGE            0x22
          +         
          +         #define IVHD_DEV_ALL                    0x01
          +         #define IVHD_DEV_SELECT                 0x02
          +         #define IVHD_DEV_SELECT_RANGE_START     0x03
          +         #define IVHD_DEV_RANGE_END              0x04
          +         #define IVHD_DEV_ALIAS                  0x42
          +         #define IVHD_DEV_ALIAS_RANGE            0x43
          +         #define IVHD_DEV_EXT_SELECT             0x46
          +         #define IVHD_DEV_EXT_SELECT_RANGE       0x47
          +         
          +         #define IVHD_FLAG_HT_TUN_EN             0x00
          +         #define IVHD_FLAG_PASSPW_EN             0x01
          +         #define IVHD_FLAG_RESPASSPW_EN          0x02
          +         #define IVHD_FLAG_ISOC_EN               0x03
          +         
          +         #define IVMD_FLAG_EXCL_RANGE            0x08
          +         #define IVMD_FLAG_UNITY_MAP             0x01
          +         
          +         #define ACPI_DEVFLAG_INITPASS           0x01
          +         #define ACPI_DEVFLAG_EXTINT             0x02
          +         #define ACPI_DEVFLAG_NMI                0x04
          +         #define ACPI_DEVFLAG_SYSMGT1            0x10
          +         #define ACPI_DEVFLAG_SYSMGT2            0x20
          +         #define ACPI_DEVFLAG_LINT0              0x40
          +         #define ACPI_DEVFLAG_LINT1              0x80
          +         #define ACPI_DEVFLAG_ATSDIS             0x10000000
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * ACPI table definitions
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * These data structures are laid over the table to parse the important values
+ ++++++++++++++++++ * out of it.
+ ++++++++++++++++++ */
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * structure describing one IOMMU in the ACPI table. Typically followed by one
+ ++++++++++++++++++ * or more ivhd_entrys.
+ ++++++++++++++++++ */
          +         struct ivhd_header {
          +             u8 type;
          +             u8 flags;
          +             u16 length;
          +             u16 devid;
          +             u16 cap_ptr;
          +             u64 mmio_phys;
          +             u16 pci_seg;
          +             u16 info;
          +             u32 reserved;
          +         } __attribute__((packed));
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * A device entry describing which devices a specific IOMMU translates and
+ ++++++++++++++++++ * which requestor ids they use.
+ ++++++++++++++++++ */
          +         struct ivhd_entry {
          +             u8 type;
          +             u16 devid;
          +             u8 flags;
          +             u32 ext;
          +         } __attribute__((packed));
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * An AMD IOMMU memory definition structure. It defines things like exclusion
+ ++++++++++++++++++ * ranges for devices and regions that should be unity mapped.
+ ++++++++++++++++++ */
          +         struct ivmd_header {
          +             u8 type;
          +             u8 flags;
          +             u16 length;
          +             u16 devid;
          +             u16 aux;
          +             u64 resv;
          +             u64 range_start;
          +             u64 range_length;
          +         } __attribute__((packed));
          +         
          +         static int __initdata amd_iommu_detected;
          +         
- -------- ---------struct list_head amd_iommu_list;
+ ++++++++++++++++++u16 amd_iommu_last_bdf;                     /* largest PCI device id we have
+ ++++++++++++++++++                                       to handle */
+ ++++++++++++++++++LIST_HEAD(amd_iommu_unity_map);             /* a list of required unity mappings
+ ++++++++++++++++++                                       we find in ACPI */
+ ++++++++++++++++++unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
+ ++++++++++++++++++int amd_iommu_isolate;                      /* if 1, device isolation is enabled */
+ ++++++++++++++++++
+ ++++++++++++++++++LIST_HEAD(amd_iommu_list);          /* list of all AMD IOMMUs in the
+ ++++++++++++++++++                                       system */
          +         
- -------- ---------static u32 dev_table_size;
- -------- ---------static u32 alias_table_size;
- -------- ---------static u32 rlookup_table_size;
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Pointer to the device table which is shared by all AMD IOMMUs
+ ++++++++++++++++++ * it is indexed by the PCI device id or the HT unit id and contains
+ ++++++++++++++++++ * information about the domain the device belongs to as well as the
+ ++++++++++++++++++ * page table root pointer.
+ ++++++++++++++++++ */
          +         struct dev_table_entry *amd_iommu_dev_table;
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The alias table is a driver specific data structure which contains the
+ ++++++++++++++++++ * mappings of the PCI device ids to the actual requestor ids on the IOMMU.
+ ++++++++++++++++++ * More than one device can share the same requestor id.
+ ++++++++++++++++++ */
          +         u16 *amd_iommu_alias_table;
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The rlookup table is used to find the IOMMU which is responsible
+ ++++++++++++++++++ * for a specific device. It is also indexed by the PCI device id.
+ ++++++++++++++++++ */
          +         struct amd_iommu **amd_iommu_rlookup_table;
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * The pd table (protection domain table) is used to find the protection domain
+ ++++++++++++++++++ * data structure a device belongs to. Indexed with the PCI device id too.
+ ++++++++++++++++++ */
          +         struct protection_domain **amd_iommu_pd_table;
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap
+ ++++++++++++++++++ * to know which ones are already in use.
+ ++++++++++++++++++ */
          +         unsigned long *amd_iommu_pd_alloc_bitmap;
          +         
- -------- ---------    UPDATE_LAST_BDF(DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap)));
+ ++++++++++++++++++static u32 dev_table_size;  /* size of the device table */
+ ++++++++++++++++++static u32 alias_table_size;        /* size of the alias table */
+ ++++++++++++++++++static u32 rlookup_table_size;      /* size if the rlookup table */
          +         
+ ++++++++++++++++++static inline void update_last_devid(u16 devid)
+ ++++++++++++++++++{
+ ++++++++++++++++++    if (devid > amd_iommu_last_bdf)
+ ++++++++++++++++++            amd_iommu_last_bdf = devid;
+ ++++++++++++++++++}
+ ++++++++++++++++++
+ ++++++++++++++++++static inline unsigned long tbl_size(int entry_size)
+ ++++++++++++++++++{
+ ++++++++++++++++++    unsigned shift = PAGE_SHIFT +
+ ++++++++++++++++++                     get_order(amd_iommu_last_bdf * entry_size);
+ ++++++++++++++++++
+ ++++++++++++++++++    return 1UL << shift;
+ ++++++++++++++++++}
+ ++++++++++++++++++
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * AMD IOMMU MMIO register space handling functions
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * These functions are used to program the IOMMU device registers in
+ ++++++++++++++++++ * MMIO space required for that driver.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function set the exclusion range in the IOMMU. DMA accesses to the
+ ++++++++++++++++++ * exclusion range are passed through untranslated
+ ++++++++++++++++++ */
          +         static void __init iommu_set_exclusion_range(struct amd_iommu *iommu)
          +         {
          +             u64 start = iommu->exclusion_start & PAGE_MASK;
          +             u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
          +             u64 entry;
          +         
          +             if (!iommu->exclusion_start)
          +                     return;
          +         
          +             entry = start | MMIO_EXCL_ENABLE_MASK;
          +             memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
          +                             &entry, sizeof(entry));
          +         
          +             entry = limit;
          +             memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
          +                             &entry, sizeof(entry));
          +         }
          +         
+ ++++++++++++++++++/* Programs the physical address of the device table into the IOMMU hardware */
          +         static void __init iommu_set_device_table(struct amd_iommu *iommu)
          +         {
          +             u32 entry;
          +         
          +             BUG_ON(iommu->mmio_base == NULL);
          +         
          +             entry = virt_to_phys(amd_iommu_dev_table);
          +             entry |= (dev_table_size >> 12) - 1;
          +             memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
          +                             &entry, sizeof(entry));
          +         }
          +         
+ ++++++++++++++++++/* Generic functions to enable/disable certain features of the IOMMU. */
          +         static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
          +         {
          +             u32 ctrl;
          +         
          +             ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
          +             ctrl |= (1 << bit);
          +             writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
          +         }
          +         
          +         static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
          +         {
          +             u32 ctrl;
          +         
          +             ctrl = (u64)readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
          +             ctrl &= ~(1 << bit);
          +             writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
          +         }
          +         
+ ++++++++++++++++++/* Function to enable the hardware */
          +         void __init iommu_enable(struct amd_iommu *iommu)
          +         {
          +             printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at ");
          +             print_devid(iommu->devid, 0);
          +             printk(" cap 0x%hx\n", iommu->cap_ptr);
          +         
          +             iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
+ ++++++++++++++++++ * the system has one.
+ ++++++++++++++++++ */
          +         static u8 * __init iommu_map_mmio_space(u64 address)
          +         {
          +             u8 *ret;
          +         
          +             if (!request_mem_region(address, MMIO_REGION_LENGTH, "amd_iommu"))
          +                     return NULL;
          +         
          +             ret = ioremap_nocache(address, MMIO_REGION_LENGTH);
          +             if (ret != NULL)
          +                     return ret;
          +         
          +             release_mem_region(address, MMIO_REGION_LENGTH);
          +         
          +             return NULL;
          +         }
          +         
          +         static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
          +         {
          +             if (iommu->mmio_base)
          +                     iounmap(iommu->mmio_base);
          +             release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH);
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * The functions below belong to the first pass of AMD IOMMU ACPI table
+ ++++++++++++++++++ * parsing. In this pass we try to find out the highest device id this
+ ++++++++++++++++++ * code has to handle. Upon this information the size of the shared data
+ ++++++++++++++++++ * structures is determined later.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function reads the last device id the IOMMU has to handle from the PCI
+ ++++++++++++++++++ * capability header for this IOMMU
+ ++++++++++++++++++ */
          +         static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr)
          +         {
          +             u32 cap;
          +         
          +             cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
- -------- ---------                    UPDATE_LAST_BDF(dev->devid);
+ ++++++++++++++++++    update_last_devid(calc_devid(MMIO_GET_BUS(cap), MMIO_GET_LD(cap)));
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * After reading the highest device id from the IOMMU PCI capability header
+ ++++++++++++++++++ * this function looks if there is a higher device id defined in the ACPI table
+ ++++++++++++++++++ */
          +         static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
          +         {
          +             u8 *p = (void *)h, *end = (void *)h;
          +             struct ivhd_entry *dev;
          +         
          +             p += sizeof(*h);
          +             end += h->length;
          +         
          +             find_last_devid_on_pci(PCI_BUS(h->devid),
          +                             PCI_SLOT(h->devid),
          +                             PCI_FUNC(h->devid),
          +                             h->cap_ptr);
          +         
          +             while (p < end) {
          +                     dev = (struct ivhd_entry *)p;
          +                     switch (dev->type) {
          +                     case IVHD_DEV_SELECT:
          +                     case IVHD_DEV_RANGE_END:
          +                     case IVHD_DEV_ALIAS:
          +                     case IVHD_DEV_EXT_SELECT:
- -------- ---------    u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL,
+ ++++++++++++++++++                    /* all the above subfield types refer to device ids */
+ ++++++++++++++++++                    update_last_devid(dev->devid);
          +                             break;
          +                     default:
          +                             break;
          +                     }
          +                     p += 0x04 << (*p >> 6);
          +             }
          +         
          +             WARN_ON(p != end);
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Iterate over all IVHD entries in the ACPI table and find the highest device
+ ++++++++++++++++++ * id which we need to handle. This is the first of three functions which parse
+ ++++++++++++++++++ * the ACPI table. So we check the checksum here.
+ ++++++++++++++++++ */
          +         static int __init find_last_devid_acpi(struct acpi_table_header *table)
          +         {
          +             int i;
          +             u8 checksum = 0, *p = (u8 *)table, *end = (u8 *)table;
          +             struct ivhd_header *h;
          +         
          +             /*
          +              * Validate checksum here so we don't need to do it when
          +              * we actually parse the table
          +              */
          +             for (i = 0; i < table->length; ++i)
          +                     checksum += p[i];
          +             if (checksum != 0)
          +                     /* ACPI table corrupt */
          +                     return -ENODEV;
          +         
          +             p += IVRS_HEADER_LENGTH;
          +         
          +             end += table->length;
          +             while (p < end) {
          +                     h = (struct ivhd_header *)p;
          +                     switch (h->type) {
          +                     case ACPI_IVHD_TYPE:
          +                             find_last_devid_from_ivhd(h);
          +                             break;
          +                     default:
          +                             break;
          +                     }
          +                     p += h->length;
          +             }
          +             WARN_ON(p != end);
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * The following functions belong the the code path which parses the ACPI table
+ ++++++++++++++++++ * the second time. In this ACPI parsing iteration we allocate IOMMU specific
+ ++++++++++++++++++ * data structures, initialize the device/alias/rlookup table and also
+ ++++++++++++++++++ * basically initialize the hardware.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Allocates the command buffer. This buffer is per AMD IOMMU. We can
+ ++++++++++++++++++ * write commands to that buffer later and the IOMMU will execute them
+ ++++++++++++++++++ * asynchronously
+ ++++++++++++++++++ */
          +         static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
          +         {
- -------- ---------    u64 entry = 0;
+ ++++++++++++++++++    u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
          +                             get_order(CMD_BUFFER_SIZE));
- -------- ---------    memset(cmd_buf, 0, CMD_BUFFER_SIZE);
- -------- ---------
+ ++++++++++++++++++    u64 entry;
          +         
          +             if (cmd_buf == NULL)
          +                     return NULL;
          +         
          +             iommu->cmd_buf_size = CMD_BUFFER_SIZE;
          +         
- -------- ---------    if (iommu->cmd_buf)
- -------- ---------            free_pages((unsigned long)iommu->cmd_buf,
- -------- ---------                            get_order(CMD_BUFFER_SIZE));
          +             entry = (u64)virt_to_phys(cmd_buf);
          +             entry |= MMIO_CMD_SIZE_512;
          +             memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
          +                             &entry, sizeof(entry));
          +         
          +             iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
          +         
          +             return cmd_buf;
          +         }
          +         
          +         static void __init free_command_buffer(struct amd_iommu *iommu)
          +         {
- -------- ---------static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags)
+ ++++++++++++++++++    free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
          +         }
          +         
+ ++++++++++++++++++/* sets a specific bit in the device table entry. */
          +         static void set_dev_entry_bit(u16 devid, u8 bit)
          +         {
          +             int i = (bit >> 5) & 0x07;
          +             int _bit = bit & 0x1f;
          +         
          +             amd_iommu_dev_table[devid].data[i] |= (1 << _bit);
          +         }
          +         
- -------- ---------}
+ ++++++++++++++++++/* Writes the specific IOMMU for a device into the rlookup table */
+ ++++++++++++++++++static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
+ ++++++++++++++++++{
+ ++++++++++++++++++    amd_iommu_rlookup_table[devid] = iommu;
+ ++++++++++++++++++}
+ ++++++++++++++++++
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function takes the device specific flags read from the ACPI
+ ++++++++++++++++++ * table and sets up the device table entry with that information
+ ++++++++++++++++++ */
+ ++++++++++++++++++static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
+ ++++++++++++++++++                                       u16 devid, u32 flags, u32 ext_flags)
          +         {
          +             if (flags & ACPI_DEVFLAG_INITPASS)
          +                     set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS);
          +             if (flags & ACPI_DEVFLAG_EXTINT)
          +                     set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS);
          +             if (flags & ACPI_DEVFLAG_NMI)
          +                     set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS);
          +             if (flags & ACPI_DEVFLAG_SYSMGT1)
          +                     set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1);
          +             if (flags & ACPI_DEVFLAG_SYSMGT2)
          +                     set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2);
          +             if (flags & ACPI_DEVFLAG_LINT0)
          +                     set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS);
          +             if (flags & ACPI_DEVFLAG_LINT1)
          +                     set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS);
- -------- ---------static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
- -------- ---------{
- -------- ---------    amd_iommu_rlookup_table[devid] = iommu;
          +         
- -------- ---------    iommu->first_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_FD(range));
- -------- ---------    iommu->last_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_LD(range));
+ ++++++++++++++++++    set_iommu_for_device(iommu, devid);
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Reads the device exclusion range from ACPI and initialize IOMMU with
+ ++++++++++++++++++ * it
+ ++++++++++++++++++ */
          +         static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
          +         {
          +             struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
          +         
          +             if (!(m->flags & IVMD_FLAG_EXCL_RANGE))
          +                     return;
          +         
          +             if (iommu) {
+ ++++++++++++++++++            /*
+ ++++++++++++++++++             * We only can configure exclusion ranges per IOMMU, not
+ ++++++++++++++++++             * per device. But we can enable the exclusion range per
+ ++++++++++++++++++             * device. This is done here
+ ++++++++++++++++++             */
          +                     set_dev_entry_bit(m->devid, DEV_ENTRY_EX);
          +                     iommu->exclusion_start = m->range_start;
          +                     iommu->exclusion_length = m->range_length;
          +             }
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function reads some important data from the IOMMU PCI space and
+ ++++++++++++++++++ * initializes the driver data structure with it. It reads the hardware
+ ++++++++++++++++++ * capabilities and the first/last device entries
+ ++++++++++++++++++ */
          +         static void __init init_iommu_from_pci(struct amd_iommu *iommu)
          +         {
          +             int bus = PCI_BUS(iommu->devid);
          +             int dev = PCI_SLOT(iommu->devid);
          +             int fn  = PCI_FUNC(iommu->devid);
          +             int cap_ptr = iommu->cap_ptr;
          +             u32 range;
          +         
          +             iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET);
          +         
          +             range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET);
- -------- ---------    bool alias = 0;
+ ++++++++++++++++++    iommu->first_device = calc_devid(MMIO_GET_BUS(range),
+ ++++++++++++++++++                                     MMIO_GET_FD(range));
+ ++++++++++++++++++    iommu->last_device = calc_devid(MMIO_GET_BUS(range),
+ ++++++++++++++++++                                    MMIO_GET_LD(range));
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Takes a pointer to an AMD IOMMU entry in the ACPI table and
+ ++++++++++++++++++ * initializes the hardware and our data structures with it.
+ ++++++++++++++++++ */
          +         static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
          +                                             struct ivhd_header *h)
          +         {
          +             u8 *p = (u8 *)h;
          +             u8 *end = p, flags = 0;
          +             u16 dev_i, devid = 0, devid_start = 0, devid_to = 0;
          +             u32 ext_flags = 0;
- -------- ---------                            set_dev_entry_from_acpi(dev_i, e->flags, 0);
+ ++++++++++++++++++    bool alias = false;
          +             struct ivhd_entry *e;
          +         
          +             /*
          +              * First set the recommended feature enable bits from ACPI
          +              * into the IOMMU control registers
          +              */
          +             h->flags & IVHD_FLAG_HT_TUN_EN ?
          +                     iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
          +                     iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
          +         
          +             h->flags & IVHD_FLAG_PASSPW_EN ?
          +                     iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
          +                     iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
          +         
          +             h->flags & IVHD_FLAG_RESPASSPW_EN ?
          +                     iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
          +                     iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
          +         
          +             h->flags & IVHD_FLAG_ISOC_EN ?
          +                     iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
          +                     iommu_feature_disable(iommu, CONTROL_ISOC_EN);
          +         
          +             /*
          +              * make IOMMU memory accesses cache coherent
          +              */
          +             iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
          +         
          +             /*
          +              * Done. Now parse the device entries
          +              */
          +             p += sizeof(struct ivhd_header);
          +             end += h->length;
          +         
          +             while (p < end) {
          +                     e = (struct ivhd_entry *)p;
          +                     switch (e->type) {
          +                     case IVHD_DEV_ALL:
          +                             for (dev_i = iommu->first_device;
          +                                             dev_i <= iommu->last_device; ++dev_i)
- -------- ---------                    set_dev_entry_from_acpi(devid, e->flags, 0);
+ ++++++++++++++++++                            set_dev_entry_from_acpi(iommu, dev_i,
+ ++++++++++++++++++                                                    e->flags, 0);
          +                             break;
          +                     case IVHD_DEV_SELECT:
          +                             devid = e->devid;
- -------- ---------                    alias = 0;
+ ++++++++++++++++++                    set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
          +                             break;
          +                     case IVHD_DEV_SELECT_RANGE_START:
          +                             devid_start = e->devid;
          +                             flags = e->flags;
          +                             ext_flags = 0;
- -------- ---------                    set_dev_entry_from_acpi(devid, e->flags, 0);
+ ++++++++++++++++++                    alias = false;
          +                             break;
          +                     case IVHD_DEV_ALIAS:
          +                             devid = e->devid;
          +                             devid_to = e->ext >> 8;
- -------- ---------                    alias = 1;
+ ++++++++++++++++++                    set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
          +                             amd_iommu_alias_table[devid] = devid_to;
          +                             break;
          +                     case IVHD_DEV_ALIAS_RANGE:
          +                             devid_start = e->devid;
          +                             flags = e->flags;
          +                             devid_to = e->ext >> 8;
          +                             ext_flags = 0;
- -------- ---------                    set_dev_entry_from_acpi(devid, e->flags, e->ext);
+ ++++++++++++++++++                    alias = true;
          +                             break;
          +                     case IVHD_DEV_EXT_SELECT:
          +                             devid = e->devid;
- -------- ---------                    alias = 0;
+ ++++++++++++++++++                    set_dev_entry_from_acpi(iommu, devid, e->flags,
+ ++++++++++++++++++                                            e->ext);
          +                             break;
          +                     case IVHD_DEV_EXT_SELECT_RANGE:
          +                             devid_start = e->devid;
          +                             flags = e->flags;
          +                             ext_flags = e->ext;
- -------- ---------                            set_dev_entry_from_acpi(
+ ++++++++++++++++++                    alias = false;
          +                             break;
          +                     case IVHD_DEV_RANGE_END:
          +                             devid = e->devid;
          +                             for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
          +                                     if (alias)
          +                                             amd_iommu_alias_table[dev_i] = devid_to;
- -------- ---------    INIT_LIST_HEAD(&amd_iommu_list);
- -------- ---------
+ ++++++++++++++++++                            set_dev_entry_from_acpi(iommu,
          +                                                     amd_iommu_alias_table[dev_i],
          +                                                     flags, ext_flags);
          +                             }
          +                             break;
          +                     default:
          +                             break;
          +                     }
          +         
          +                     p += 0x04 << (e->type >> 6);
          +             }
          +         }
          +         
+ ++++++++++++++++++/* Initializes the device->iommu mapping for the driver */
          +         static int __init init_iommu_devices(struct amd_iommu *iommu)
          +         {
          +             u16 i;
          +         
          +             for (i = iommu->first_device; i <= iommu->last_device; ++i)
          +                     set_iommu_for_device(iommu, i);
          +         
          +             return 0;
          +         }
          +         
          +         static void __init free_iommu_one(struct amd_iommu *iommu)
          +         {
          +             free_command_buffer(iommu);
          +             iommu_unmap_mmio_space(iommu);
          +         }
          +         
          +         static void __init free_iommu_all(void)
          +         {
          +             struct amd_iommu *iommu, *next;
          +         
          +             list_for_each_entry_safe(iommu, next, &amd_iommu_list, list) {
          +                     list_del(&iommu->list);
          +                     free_iommu_one(iommu);
          +                     kfree(iommu);
          +             }
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function clues the initialization function for one IOMMU
+ ++++++++++++++++++ * together and also allocates the command buffer and programs the
+ ++++++++++++++++++ * hardware. It does NOT enable the IOMMU. This is done afterwards.
+ ++++++++++++++++++ */
          +         static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
          +         {
          +             spin_lock_init(&iommu->lock);
          +             list_add_tail(&iommu->list, &amd_iommu_list);
          +         
          +             /*
          +              * Copy data from ACPI table entry to the iommu struct
          +              */
          +             iommu->devid = h->devid;
          +             iommu->cap_ptr = h->cap_ptr;
          +             iommu->mmio_phys = h->mmio_phys;
          +             iommu->mmio_base = iommu_map_mmio_space(h->mmio_phys);
          +             if (!iommu->mmio_base)
          +                     return -ENOMEM;
          +         
          +             iommu_set_device_table(iommu);
          +             iommu->cmd_buf = alloc_command_buffer(iommu);
          +             if (!iommu->cmd_buf)
          +                     return -ENOMEM;
          +         
          +             init_iommu_from_pci(iommu);
          +             init_iommu_from_acpi(iommu, h);
          +             init_iommu_devices(iommu);
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * Iterates over all IOMMU entries in the ACPI table, allocates the
+ ++++++++++++++++++ * IOMMU structure and initializes it with init_iommu_one()
+ ++++++++++++++++++ */
          +         static int __init init_iommu_all(struct acpi_table_header *table)
          +         {
          +             u8 *p = (u8 *)table, *end = (u8 *)table;
          +             struct ivhd_header *h;
          +             struct amd_iommu *iommu;
          +             int ret;
          +         
- -------- ---------    INIT_LIST_HEAD(&amd_iommu_unity_map);
- -------- ---------
          +             end += table->length;
          +             p += IVRS_HEADER_LENGTH;
          +         
          +             while (p < end) {
          +                     h = (struct ivhd_header *)p;
          +                     switch (*p) {
          +                     case ACPI_IVHD_TYPE:
          +                             iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
          +                             if (iommu == NULL)
          +                                     return -ENOMEM;
          +                             ret = init_iommu_one(iommu, h);
          +                             if (ret)
          +                                     return ret;
          +                             break;
          +                     default:
          +                             break;
          +                     }
          +                     p += h->length;
          +         
          +             }
          +             WARN_ON(p != end);
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * The next functions belong to the third pass of parsing the ACPI
+ ++++++++++++++++++ * table. In this last pass the memory mapping requirements are
+ ++++++++++++++++++ * gathered (like exclusion and unity mapping reanges).
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
          +         static void __init free_unity_maps(void)
          +         {
          +             struct unity_map_entry *entry, *next;
          +         
          +             list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) {
          +                     list_del(&entry->list);
          +                     kfree(entry);
          +             }
          +         }
          +         
+ ++++++++++++++++++/* called when we find an exclusion range definition in ACPI */
          +         static int __init init_exclusion_range(struct ivmd_header *m)
          +         {
          +             int i;
          +         
          +             switch (m->type) {
          +             case ACPI_IVMD_TYPE:
          +                     set_device_exclusion_range(m->devid, m);
          +                     break;
          +             case ACPI_IVMD_TYPE_ALL:
          +                     for (i = 0; i < amd_iommu_last_bdf; ++i)
          +                             set_device_exclusion_range(i, m);
          +                     break;
          +             case ACPI_IVMD_TYPE_RANGE:
          +                     for (i = m->devid; i <= m->aux; ++i)
          +                             set_device_exclusion_range(i, m);
          +                     break;
          +             default:
          +                     break;
          +             }
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/* called for unity map ACPI definition */
          +         static int __init init_unity_map_range(struct ivmd_header *m)
          +         {
          +             struct unity_map_entry *e = 0;
          +         
          +             e = kzalloc(sizeof(*e), GFP_KERNEL);
          +             if (e == NULL)
          +                     return -ENOMEM;
          +         
          +             switch (m->type) {
          +             default:
          +             case ACPI_IVMD_TYPE:
          +                     e->devid_start = e->devid_end = m->devid;
          +                     break;
          +             case ACPI_IVMD_TYPE_ALL:
          +                     e->devid_start = 0;
          +                     e->devid_end = amd_iommu_last_bdf;
          +                     break;
          +             case ACPI_IVMD_TYPE_RANGE:
          +                     e->devid_start = m->devid;
          +                     e->devid_end = m->aux;
          +                     break;
          +             }
          +             e->address_start = PAGE_ALIGN(m->range_start);
          +             e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
          +             e->prot = m->flags >> 1;
          +         
          +             list_add_tail(&e->list, &amd_iommu_unity_map);
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/* iterates over all memory definitions we find in the ACPI table */
          +         static int __init init_memory_definitions(struct acpi_table_header *table)
          +         {
          +             u8 *p = (u8 *)table, *end = (u8 *)table;
          +             struct ivmd_header *m;
          +         
- -------- ---------    dev_table_size     = TBL_SIZE(DEV_TABLE_ENTRY_SIZE);
- -------- ---------    alias_table_size   = TBL_SIZE(ALIAS_TABLE_ENTRY_SIZE);
- -------- ---------    rlookup_table_size = TBL_SIZE(RLOOKUP_TABLE_ENTRY_SIZE);
          +             end += table->length;
          +             p += IVRS_HEADER_LENGTH;
          +         
          +             while (p < end) {
          +                     m = (struct ivmd_header *)p;
          +                     if (m->flags & IVMD_FLAG_EXCL_RANGE)
          +                             init_exclusion_range(m);
          +                     else if (m->flags & IVMD_FLAG_UNITY_MAP)
          +                             init_unity_map_range(m);
          +         
          +                     p += m->length;
          +             }
          +         
          +             return 0;
          +         }
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This function finally enables all IOMMUs found in the system after
+ ++++++++++++++++++ * they have been initialized
+ ++++++++++++++++++ */
          +         static void __init enable_iommus(void)
          +         {
          +             struct amd_iommu *iommu;
          +         
          +             list_for_each_entry(iommu, &amd_iommu_list, list) {
          +                     iommu_set_exclusion_range(iommu);
          +                     iommu_enable(iommu);
          +             }
          +         }
          +         
          +         /*
          +          * Suspend/Resume support
          +          * disable suspend until real resume implemented
          +          */
          +         
          +         static int amd_iommu_resume(struct sys_device *dev)
          +         {
          +             return 0;
          +         }
          +         
          +         static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state)
          +         {
          +             return -EINVAL;
          +         }
          +         
          +         static struct sysdev_class amd_iommu_sysdev_class = {
          +             .name = "amd_iommu",
          +             .suspend = amd_iommu_suspend,
          +             .resume = amd_iommu_resume,
          +         };
          +         
          +         static struct sys_device device_amd_iommu = {
          +             .id = 0,
          +             .cls = &amd_iommu_sysdev_class,
          +         };
          +         
+ ++++++++++++++++++/*
+ ++++++++++++++++++ * This is the core init function for AMD IOMMU hardware in the system.
+ ++++++++++++++++++ * This function is called from the generic x86 DMA layer initialization
+ ++++++++++++++++++ * code.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * This function basically parses the ACPI table for AMD IOMMU (IVRS)
+ ++++++++++++++++++ * three times:
+ ++++++++++++++++++ *
+ ++++++++++++++++++ *  1 pass) Find the highest PCI device id the driver has to handle.
+ ++++++++++++++++++ *          Upon this information the size of the data structures is
+ ++++++++++++++++++ *          determined that needs to be allocated.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ *  2 pass) Initialize the data structures just allocated with the
+ ++++++++++++++++++ *          information in the ACPI table about available AMD IOMMUs
+ ++++++++++++++++++ *          in the system. It also maps the PCI devices in the
+ ++++++++++++++++++ *          system to specific IOMMUs
+ ++++++++++++++++++ *
+ ++++++++++++++++++ *  3 pass) After the basic data structures are allocated and
+ ++++++++++++++++++ *          initialized we update them with information about memory
+ ++++++++++++++++++ *          remapping requirements parsed out of the ACPI table in
+ ++++++++++++++++++ *          this last pass.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * After that the hardware is initialized and ready to go. In the last
+ ++++++++++++++++++ * step we do some Linux specific things like registering the driver in
+ ++++++++++++++++++ * the dma_ops interface and initializing the suspend/resume support
+ ++++++++++++++++++ * functions. Finally it prints some information about AMD IOMMUs and
+ ++++++++++++++++++ * the driver state and enables the hardware.
+ ++++++++++++++++++ */
          +         int __init amd_iommu_init(void)
          +         {
          +             int i, ret = 0;
          +         
          +         
          +             if (no_iommu) {
          +                     printk(KERN_INFO "AMD IOMMU disabled by kernel command line\n");
          +                     return 0;
          +             }
          +         
          +             if (!amd_iommu_detected)
          +                     return -ENODEV;
          +         
          +             /*
          +              * First parse ACPI tables to find the largest Bus/Dev/Func
          +              * we need to handle. Upon this information the shared data
          +              * structures for the IOMMUs in the system will be allocated
          +              */
          +             if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0)
          +                     return -ENODEV;
          +         
- -------- ---------    amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL,
+ ++++++++++++++++++    dev_table_size     = tbl_size(DEV_TABLE_ENTRY_SIZE);
+ ++++++++++++++++++    alias_table_size   = tbl_size(ALIAS_TABLE_ENTRY_SIZE);
+ ++++++++++++++++++    rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE);
          +         
          +             ret = -ENOMEM;
          +         
          +             /* Device table - directly used by all IOMMUs */
- -------- ---------    amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL,
+ ++++++++++++++++++    amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
          +                                           get_order(dev_table_size));
          +             if (amd_iommu_dev_table == NULL)
          +                     goto out;
          +         
          +             /*
          +              * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the
          +              * IOMMU see for that device
          +              */
          +             amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL,
          +                             get_order(alias_table_size));
          +             if (amd_iommu_alias_table == NULL)
          +                     goto free;
          +         
          +             /* IOMMU rlookup table - find the IOMMU for a specific device */
          +             amd_iommu_rlookup_table = (void *)__get_free_pages(GFP_KERNEL,
          +                             get_order(rlookup_table_size));
          +             if (amd_iommu_rlookup_table == NULL)
          +                     goto free;
          +         
          +             /*
          +              * Protection Domain table - maps devices to protection domains
          +              * This table has the same size as the rlookup_table
          +              */
- -------- ---------    amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(GFP_KERNEL,
+ ++++++++++++++++++    amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
          +                                          get_order(rlookup_table_size));
          +             if (amd_iommu_pd_table == NULL)
          +                     goto free;
          +         
- -------- ---------     * memory is allocated now; initialize the device table with all zeroes
- -------- ---------     * and let all alias entries point to itself
+ ++++++++++++++++++    amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
+ ++++++++++++++++++                                        GFP_KERNEL | __GFP_ZERO,
          +                                                 get_order(MAX_DOMAIN_ID/8));
          +             if (amd_iommu_pd_alloc_bitmap == NULL)
          +                     goto free;
          +         
          +             /*
- -------- ---------    memset(amd_iommu_dev_table, 0, dev_table_size);
+ ++++++++++++++++++     * let all alias entries point to itself
          +              */
- -------- ---------    memset(amd_iommu_pd_table, 0, rlookup_table_size);
- -------- ---------    memset(amd_iommu_pd_alloc_bitmap, 0, MAX_DOMAIN_ID / 8);
- -------- ---------
          +             for (i = 0; i < amd_iommu_last_bdf; ++i)
          +                     amd_iommu_alias_table[i] = i;
          +         
- -------- ---------    if (amd_iommu_pd_alloc_bitmap)
- -------- ---------            free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1);
          +             /*
          +              * never allocate domain 0 because its used as the non-allocated and
          +              * error value placeholder
          +              */
          +             amd_iommu_pd_alloc_bitmap[0] = 1;
          +         
          +             /*
          +              * now the data structures are allocated and basically initialized
          +              * start the real acpi table scan
          +              */
          +             ret = -ENODEV;
          +             if (acpi_table_parse("IVRS", init_iommu_all) != 0)
          +                     goto free;
          +         
          +             if (acpi_table_parse("IVRS", init_memory_definitions) != 0)
          +                     goto free;
          +         
          +             ret = amd_iommu_init_dma_ops();
          +             if (ret)
          +                     goto free;
          +         
          +             ret = sysdev_class_register(&amd_iommu_sysdev_class);
          +             if (ret)
          +                     goto free;
          +         
          +             ret = sysdev_register(&device_amd_iommu);
          +             if (ret)
          +                     goto free;
          +         
          +             enable_iommus();
          +         
          +             printk(KERN_INFO "AMD IOMMU: aperture size is %d MB\n",
          +                             (1 << (amd_iommu_aperture_order-20)));
          +         
          +             printk(KERN_INFO "AMD IOMMU: device isolation ");
          +             if (amd_iommu_isolate)
          +                     printk("enabled\n");
          +             else
          +                     printk("disabled\n");
          +         
          +         out:
          +             return ret;
          +         
          +         free:
- -------- ---------    if (amd_iommu_pd_table)
- -------- ---------            free_pages((unsigned long)amd_iommu_pd_table,
- -------- ---------                            get_order(rlookup_table_size));
+ ++++++++++++++++++    free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1);
          +         
- -------- ---------    if (amd_iommu_rlookup_table)
- -------- ---------            free_pages((unsigned long)amd_iommu_rlookup_table,
- -------- ---------                            get_order(rlookup_table_size));
+ ++++++++++++++++++    free_pages((unsigned long)amd_iommu_pd_table,
+ ++++++++++++++++++               get_order(rlookup_table_size));
          +         
- -------- ---------    if (amd_iommu_alias_table)
- -------- ---------            free_pages((unsigned long)amd_iommu_alias_table,
- -------- ---------                            get_order(alias_table_size));
+ ++++++++++++++++++    free_pages((unsigned long)amd_iommu_rlookup_table,
+ ++++++++++++++++++               get_order(rlookup_table_size));
          +         
- -------- ---------    if (amd_iommu_dev_table)
- -------- ---------            free_pages((unsigned long)amd_iommu_dev_table,
- -------- ---------                            get_order(dev_table_size));
+ ++++++++++++++++++    free_pages((unsigned long)amd_iommu_alias_table,
+ ++++++++++++++++++               get_order(alias_table_size));
          +         
- -------- ---------    if (swiotlb || no_iommu || iommu_detected)
+ ++++++++++++++++++    free_pages((unsigned long)amd_iommu_dev_table,
+ ++++++++++++++++++               get_order(dev_table_size));
          +         
          +             free_iommu_all();
          +         
          +             free_unity_maps();
          +         
          +             goto out;
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * Early detect code. This code runs at IOMMU detection time in the DMA
+ ++++++++++++++++++ * layer. It just looks if there is an IVRS ACPI table to detect AMD
+ ++++++++++++++++++ * IOMMUs
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
          +         static int __init early_amd_iommu_detect(struct acpi_table_header *table)
          +         {
          +             return 0;
          +         }
          +         
          +         void __init amd_iommu_detect(void)
          +         {
- -------- ---------    for (; *str; ++str) {
- -------- ---------            if (strcmp(str, "32M") == 0)
- -------- ---------                    amd_iommu_aperture_order = 25;
- -------- ---------            if (strcmp(str, "64M") == 0)
- -------- ---------                    amd_iommu_aperture_order = 26;
- -------- ---------            if (strcmp(str, "128M") == 0)
- -------- ---------                    amd_iommu_aperture_order = 27;
- -------- ---------            if (strcmp(str, "256M") == 0)
- -------- ---------                    amd_iommu_aperture_order = 28;
- -------- ---------            if (strcmp(str, "512M") == 0)
- -------- ---------                    amd_iommu_aperture_order = 29;
- -------- ---------            if (strcmp(str, "1G") == 0)
- -------- ---------                    amd_iommu_aperture_order = 30;
- -------- ---------    }
+ ++++++++++++++++++    if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture))
          +                     return;
          +         
          +             if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) {
          +                     iommu_detected = 1;
          +                     amd_iommu_detected = 1;
          +         #ifdef CONFIG_GART_IOMMU
          +                     gart_iommu_aperture_disabled = 1;
          +                     gart_iommu_aperture = 0;
          +         #endif
          +             }
          +         }
          +         
+ ++++++++++++++++++/****************************************************************************
+ ++++++++++++++++++ *
+ ++++++++++++++++++ * Parsing functions for the AMD IOMMU specific kernel command line
+ ++++++++++++++++++ * options.
+ ++++++++++++++++++ *
+ ++++++++++++++++++ ****************************************************************************/
+ ++++++++++++++++++
          +         static int __init parse_amd_iommu_options(char *str)
          +         {
          +             for (; *str; ++str) {
          +                     if (strcmp(str, "isolate") == 0)
          +                             amd_iommu_isolate = 1;
          +             }
          +         
          +             return 1;
          +         }
          +         
          +         static int __init parse_amd_iommu_size_options(char *str)
          +         {
+ ++++++++++++++++++    unsigned order = PAGE_SHIFT + get_order(memparse(str, &str));
+ ++++++++++++++++++
+ ++++++++++++++++++    if ((order > 24) && (order < 31))
+ ++++++++++++++++++            amd_iommu_aperture_order = order;
          +         
          +             return 1;
          +         }
          +         
          +         __setup("amd_iommu=", parse_amd_iommu_options);
          +         __setup("amd_iommu_size=", parse_amd_iommu_size_options);
@@@@@@@@@@@@@@@@@@@@@ -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 -74,7 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 -75,17 +75,17 @@@@@@@@@@@@@@@@@@@@@ char system_vectors[NR_VECTORS] = { [0 
                    /*
                     * Debug level, exported for io_apic.c
                     */
-- -----------------int apic_verbosity;
++ +++++++++++++++++unsigned int apic_verbosity;
          +         
          +         int pic_mode;
          +         
          +         /* Have we found an MP table */
          +         int smp_found_config;
          +         
          +         static struct resource lapic_resource = {
          +             .name = "Local APIC",
          +             .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
          +         };
                    
                    static unsigned int calibration_result;
                    
@@@@@@@@@@@@@@@@@@@@@ -543,22 -543,22 -514,55 -543,22 -543,22 -543,22 -543,22 -543,22 -543,22 -543,22 -532,22 -543,22 -543,22 -543,22 -543,22 -543,22 -543,22 -543,22 -543,22 -543,22 +514,55 @@@@@@@@@@@@@@@@@@@@@ static int __init calibrate_APIC_clock(
                        if (!local_apic_timer_verify_ok) {
                                printk(KERN_WARNING
                                       "APIC timer disabled due to verification failure.\n");
++ +++++++++++++++++                    return -1;
++ +++++++++++++++++    }
++ +++++++++++++++++
++ +++++++++++++++++    return 0;
++ +++++++++++++++++}
++ +++++++++++++++++
++ +++++++++++++++++/*
++ +++++++++++++++++ * Setup the boot APIC
++ +++++++++++++++++ *
++ +++++++++++++++++ * Calibrate and verify the result.
++ +++++++++++++++++ */
++ +++++++++++++++++void __init setup_boot_APIC_clock(void)
++ +++++++++++++++++{
++ +++++++++++++++++    /*
++ +++++++++++++++++     * The local apic timer can be disabled via the kernel
++ +++++++++++++++++     * commandline or from the CPU detection code. Register the lapic
++ +++++++++++++++++     * timer as a dummy clock event source on SMP systems, so the
++ +++++++++++++++++     * broadcast mechanism is used. On UP systems simply ignore it.
++ +++++++++++++++++     */
++ +++++++++++++++++    if (local_apic_timer_disabled) {
                                /* No broadcast on UP ! */
-- -----------------            if (num_possible_cpus() == 1)
-- -----------------                    return;
-- -----------------    } else {
-- -----------------            /*
-- -----------------             * If nmi_watchdog is set to IO_APIC, we need the
-- -----------------             * PIT/HPET going.  Otherwise register lapic as a dummy
-- -----------------             * device.
-- -----------------             */
-- -----------------            if (nmi_watchdog != NMI_IO_APIC)
-- -----------------                    lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
-- -----------------            else
-- -----------------                    printk(KERN_WARNING "APIC timer registered as dummy,"
-- ------- ---------                            " due to nmi_watchdog=%d!\n", nmi_watchdog);
          -                                    " due to nmi_watchdog=1!\n");
++ +++++++++++++++++            if (num_possible_cpus() > 1) {
++ +++++++++++++++++                    lapic_clockevent.mult = 1;
++ +++++++++++++++++                    setup_APIC_timer();
++ +++++++++++++++++            }
++ +++++++++++++++++            return;
      ++  + +   +       }
      ++  + +   +   
++ +++++++++++++++++    apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
++ +++++++++++++++++                "calibrating APIC timer ...\n");
++ +++++++++++++++++
++ +++++++++++++++++    if (calibrate_APIC_clock()) {
++ +++++++++++++++++            /* No broadcast on UP ! */
++ +++++++++++++++++            if (num_possible_cpus() > 1)
++ +++++++++++++++++                    setup_APIC_timer();
++ +++++++++++++++++            return;
++ +++  ++ + +++ +++    }
++ +++  ++ + +++ +++
++ +++++++++++++++++    /*
++ +++++++++++++++++     * If nmi_watchdog is set to IO_APIC, we need the
++ +++++++++++++++++     * PIT/HPET going.  Otherwise register lapic as a dummy
++ +++++++++++++++++     * device.
++ +++++++++++++++++     */
++ +++++++++++++++++    if (nmi_watchdog != NMI_IO_APIC)
++ +++++++++++++++++            lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
++ +++++++++++++++++    else
++ +++++++++++++++++            printk(KERN_WARNING "APIC timer registered as dummy,"
++ +++++++++++++++++                    " due to nmi_watchdog=%d!\n", nmi_watchdog);
++ +++++++++++++++++
                        /* Setup the lapic or request the broadcast */
                        setup_APIC_timer();
                    }
                    
                    int __init APIC_init_uniprocessor(void)
                    {
---- ----- ---------    if (disable_apic)
          -             if (enable_local_apic < 0)
---- ---------------            clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
---- ---------------
                        if (!smp_found_config && !cpu_has_apic)
                                return -1;
                    
                         * The reschedule interrupt is a CPU-to-CPU reschedule-helper
                         * IPI, driven by wakeup.
                         */
          -             set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
          +             alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
                    
                        /* IPI for invalidation */
          -             set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
          +             alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
                    
                        /* IPI for generic function call */
          -             set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
          +             alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
      ++  + +   +   
      ++  + +   +       /* IPI for single call function */
      ++  + +   +       set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
      ++  + +   +                               call_function_single_interrupt);
                    }
                    #endif
                    
@@@@@@@@@@@@@@@@@@@@@ -1699,8 -1699,8 -1703,8 -1699,8 -1696,8 -1699,8 -1695,8 -1695,8 -1699,8 -1699,8 -1710,8 -1699,8 -1695,8 -1699,8 -1699,8 -1699,8 -1695,8 -1699,8 -1699,8 -1699,8 +1700,8 @@@@@@@@@@@@@@@@@@@@@ early_param("lapic", parse_lapic)
                    
                    static int __init parse_nolapic(char *arg)
                    {
          -             enable_local_apic = -1;
          -             clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
          +             disable_apic = 1;
---- ----- ---------    clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
++++ +++++++++++++++    setup_clear_cpu_cap(X86_FEATURE_APIC);
                        return 0;
                    }
                    early_param("nolapic", parse_nolapic);
@@@@@@@@@@@@@@@@@@@@@ -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,7 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 -54,10 +54,10 @@@@@@@@@@@@@@@@@@@@@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_o
                    /*
                     * Debug level, exported for io_apic.c
                     */
-- -----------------int apic_verbosity;
++ +++++++++++++++++unsigned int apic_verbosity;
          +         
          +         /* Have we found an MP table */
          +         int smp_found_config;
                    
                    static struct resource lapic_resource = {
                        .name = "Local APIC",
index 7b8cc72,7b8cc72,7b8cc72,2a4475b,daee611,7b8cc72,7b8cc72,7518502,7b8cc72,7b8cc72,0000000,7b8cc72,7b8cc72,7b8cc72,7b8cc72,7b8cc72,36537ab,7b8cc72,7b8cc72,736f50f..dd6e3f1
mode 100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,000000,100644,100644,100644,100644,100644,100644,100644,100644,100644..100644
--- /dev/null
--- ------ ---------#include <linux/string.h>
          +         #include <linux/init.h>
          +         #include <linux/kernel.h>
          +         #include <linux/sched.h>
          +         #include <linux/string.h>
          +         #include <linux/bootmem.h>
          +         #include <linux/bitops.h>
          +         #include <linux/module.h>
          +         #include <linux/kgdb.h>
          +         #include <linux/topology.h>
--- ------ ---------#include <linux/module.h>
          +         #include <linux/delay.h>
          +         #include <linux/smp.h>
--- ------ ---------#include <asm/processor.h>
          +         #include <linux/percpu.h>
       -        -       unsigned int n, dummy, eax, ebx, ecx, edx;
          +         #include <asm/i387.h>
          +         #include <asm/msr.h>
          +         #include <asm/io.h>
+++++++++++++++++++ #include <asm/linkage.h>
          +         #include <asm/mmu_context.h>
          +         #include <asm/mtrr.h>
          +         #include <asm/mce.h>
          +         #include <asm/pat.h>
          +         #include <asm/numa.h>
          +         #ifdef CONFIG_X86_LOCAL_APIC
          +         #include <asm/mpspec.h>
          +         #include <asm/apic.h>
          +         #include <mach_apic.h>
          +         #endif
          +         #include <asm/pda.h>
          +         #include <asm/pgtable.h>
          +         #include <asm/processor.h>
          +         #include <asm/desc.h>
          +         #include <asm/atomic.h>
          +         #include <asm/proto.h>
          +         #include <asm/sections.h>
          +         #include <asm/setup.h>
          +         #include <asm/genapic.h>
          +         
          +         #include "cpu.h"
          +         
          +         /* We need valid kernel segments for data and code in long mode too
          +          * IRET will check the segment types  kkeil 2000/10/28
          +          * Also sysret mandates a special GDT layout
          +          */
          +         /* The TLS descriptors are currently at a different place compared to i386.
          +            Hopefully nobody expects them at a fixed place (Wine?) */
          +         DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
          +             [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
          +             [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
          +             [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
          +             [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
          +             [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
          +             [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
          +         } };
          +         EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
          +         
          +         __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
          +         
          +         /* Current gdt points %fs at the "master" per-cpu area: after this,
          +          * it's on the real one. */
          +         void switch_to_new_gdt(void)
          +         {
          +             struct desc_ptr gdt_descr;
          +         
          +             gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
          +             gdt_descr.size = GDT_SIZE - 1;
          +             load_gdt(&gdt_descr);
          +         }
          +         
          +         struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
          +         
          +         static void __cpuinit default_init(struct cpuinfo_x86 *c)
          +         {
          +             display_cacheinfo(c);
          +         }
          +         
          +         static struct cpu_dev __cpuinitdata default_cpu = {
          +             .c_init = default_init,
          +             .c_vendor = "Unknown",
          +         };
          +         static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
          +         
          +         int __cpuinit get_model_name(struct cpuinfo_x86 *c)
          +         {
          +             unsigned int *v;
          +         
          +             if (c->extended_cpuid_level < 0x80000004)
          +                     return 0;
          +         
          +             v = (unsigned int *) c->x86_model_id;
          +             cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
          +             cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
          +             cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
          +             c->x86_model_id[48] = 0;
          +             return 1;
          +         }
          +         
          +         
          +         void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
          +         {
       -        -       if (n >= 0x80000008) {
       -        -               cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
       -        -               c->x86_virt_bits = (eax >> 8) & 0xff;
       -        -               c->x86_phys_bits = eax & 0xff;
       -        -       }
       +  +     +       unsigned int n, dummy, ebx, ecx, edx;
          +         
          +             n = c->extended_cpuid_level;
          +         
          +             if (n >= 0x80000005) {
          +                     cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
          +                     printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), "
          +                            "D cache %dK (%d bytes/line)\n",
          +                            edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
          +                     c->x86_cache_size = (ecx>>24) + (edx>>24);
          +                     /* On K8 L1 TLB is inclusive, so don't count it */
          +                     c->x86_tlbsize = 0;
          +             }
          +         
          +             if (n >= 0x80000006) {
          +                     cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
          +                     ecx = cpuid_ecx(0x80000006);
          +                     c->x86_cache_size = ecx >> 16;
          +                     c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
          +         
          +                     printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
          +                     c->x86_cache_size, ecx & 0xFF);
          +             }
---- ----- ---------    c->extended_cpuid_level = cpuid_eax(0x80000000);
          +         }
          +         
          +         void __cpuinit detect_ht(struct cpuinfo_x86 *c)
          +         {
          +         #ifdef CONFIG_SMP
          +             u32 eax, ebx, ecx, edx;
          +             int index_msb, core_bits;
          +         
          +             cpuid(1, &eax, &ebx, &ecx, &edx);
          +         
          +         
          +             if (!cpu_has(c, X86_FEATURE_HT))
          +                     return;
          +             if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
          +                     goto out;
          +         
          +             smp_num_siblings = (ebx & 0xff0000) >> 16;
          +         
          +             if (smp_num_siblings == 1) {
          +                     printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
          +             } else if (smp_num_siblings > 1) {
          +         
          +                     if (smp_num_siblings > NR_CPUS) {
          +                             printk(KERN_WARNING "CPU: Unsupported number of "
          +                                    "siblings %d", smp_num_siblings);
          +                             smp_num_siblings = 1;
          +                             return;
          +                     }
          +         
          +                     index_msb = get_count_order(smp_num_siblings);
          +                     c->phys_proc_id = phys_pkg_id(index_msb);
          +         
          +                     smp_num_siblings = smp_num_siblings / c->x86_max_cores;
          +         
          +                     index_msb = get_count_order(smp_num_siblings);
          +         
          +                     core_bits = get_count_order(c->x86_max_cores);
          +         
          +                     c->cpu_core_id = phys_pkg_id(index_msb) &
          +                                                    ((1 << core_bits) - 1);
          +             }
          +         out:
          +             if ((c->x86_max_cores * smp_num_siblings) > 1) {
          +                     printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
          +                            c->phys_proc_id);
          +                     printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
          +                            c->cpu_core_id);
          +             }
          +         
          +         #endif
          +         }
          +         
          +         static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
          +         {
          +             char *v = c->x86_vendor_id;
          +             int i;
          +             static int printed;
          +         
          +             for (i = 0; i < X86_VENDOR_NUM; i++) {
          +                     if (cpu_devs[i]) {
          +                             if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
          +                                 (cpu_devs[i]->c_ident[1] &&
          +                                 !strcmp(v, cpu_devs[i]->c_ident[1]))) {
          +                                     c->x86_vendor = i;
          +                                     this_cpu = cpu_devs[i];
          +                                     return;
          +                             }
          +                     }
          +             }
          +             if (!printed) {
          +                     printed++;
          +                     printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n");
          +                     printk(KERN_ERR "CPU: Your system may be unstable.\n");
          +             }
          +             c->x86_vendor = X86_VENDOR_UNKNOWN;
          +         }
          +         
          +         static void __init early_cpu_support_print(void)
          +         {
          +             int i,j;
          +             struct cpu_dev *cpu_devx;
          +         
          +             printk("KERNEL supported cpus:\n");
          +             for (i = 0; i < X86_VENDOR_NUM; i++) {
          +                     cpu_devx = cpu_devs[i];
          +                     if (!cpu_devx)
          +                             continue;
          +                     for (j = 0; j < 2; j++) {
          +                             if (!cpu_devx->c_ident[j])
          +                                     continue;
          +                             printk("  %s %s\n", cpu_devx->c_vendor,
          +                                     cpu_devx->c_ident[j]);
          +                     }
          +             }
          +         }
          +         
          +         static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
          +         
          +         void __init early_cpu_init(void)
          +         {
          +                 struct cpu_vendor_dev *cvdev;
          +         
          +                 for (cvdev = __x86cpuvendor_start ;
          +                      cvdev < __x86cpuvendor_end   ;
          +                      cvdev++)
          +                         cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
          +             early_cpu_support_print();
          +             early_identify_cpu(&boot_cpu_data);
          +         }
          +         
          +         /* Do some early cpuid on the boot CPU to get some parameter that are
          +            needed before check_bugs. Everything advanced is in identify_cpu
          +            below. */
          +         static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
          +         {
          +             u32 tfms, xlvl;
          +         
          +             c->loops_per_jiffy = loops_per_jiffy;
          +             c->x86_cache_size = -1;
          +             c->x86_vendor = X86_VENDOR_UNKNOWN;
          +             c->x86_model = c->x86_mask = 0; /* So far unknown... */
          +             c->x86_vendor_id[0] = '\0'; /* Unset */
          +             c->x86_model_id[0] = '\0';  /* Unset */
          +             c->x86_clflush_size = 64;
          +             c->x86_cache_alignment = c->x86_clflush_size;
          +             c->x86_max_cores = 1;
          +             c->x86_coreid_bits = 0;
          +             c->extended_cpuid_level = 0;
          +             memset(&c->x86_capability, 0, sizeof c->x86_capability);
          +         
          +             /* Get vendor name */
          +             cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
          +                   (unsigned int *)&c->x86_vendor_id[0],
          +                   (unsigned int *)&c->x86_vendor_id[8],
          +                   (unsigned int *)&c->x86_vendor_id[4]);
          +         
          +             get_cpu_vendor(c);
          +         
          +             /* Initialize the standard set of capabilities */
          +             /* Note that the vendor-specific code below might override */
          +         
          +             /* Intel-defined flags: level 0x00000001 */
          +             if (c->cpuid_level >= 0x00000001) {
          +                     __u32 misc;
          +                     cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
          +                           &c->x86_capability[0]);
          +                     c->x86 = (tfms >> 8) & 0xf;
          +                     c->x86_model = (tfms >> 4) & 0xf;
          +                     c->x86_mask = tfms & 0xf;
          +                     if (c->x86 == 0xf)
          +                             c->x86 += (tfms >> 20) & 0xff;
          +                     if (c->x86 >= 0x6)
          +                             c->x86_model += ((tfms >> 16) & 0xF) << 4;
          +                     if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
          +                             c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
          +             } else {
          +                     /* Have CPUID level 0 only - unheard of */
          +                     c->x86 = 4;
          +             }
          +         
          +             c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
          +         #ifdef CONFIG_SMP
          +             c->phys_proc_id = c->initial_apicid;
          +         #endif
          +             /* AMD-defined flags: level 0x80000001 */
          +             xlvl = cpuid_eax(0x80000000);
          +             c->extended_cpuid_level = xlvl;
          +             if ((xlvl & 0xffff0000) == 0x80000000) {
          +                     if (xlvl >= 0x80000001) {
          +                             c->x86_capability[1] = cpuid_edx(0x80000001);
          +                             c->x86_capability[6] = cpuid_ecx(0x80000001);
          +                     }
          +                     if (xlvl >= 0x80000004)
          +                             get_model_name(c); /* Default name */
          +             }
          +         
          +             /* Transmeta-defined flags: level 0x80860001 */
          +             xlvl = cpuid_eax(0x80860000);
          +             if ((xlvl & 0xffff0000) == 0x80860000) {
          +                     /* Don't set x86_cpuid_level here for now to not confuse. */
          +                     if (xlvl >= 0x80860001)
          +                             c->x86_capability[2] = cpuid_edx(0x80860001);
          +             }
          +         
                -       /* Assume all 64-bit CPUs support 32-bit syscall */
                -       set_cpu_cap(c, X86_FEATURE_SYSCALL32);
          +             if (c->extended_cpuid_level >= 0x80000007)
          +                     c->x86_power = cpuid_edx(0x80000007);
          +         
------- -- ----- --     /* Assume all 64-bit CPUs support 32-bit syscall */
------- -- ----- --     set_cpu_cap(c, X86_FEATURE_SYSCALL32);
------- -- ----- -- 
       +  +     +       if (c->extended_cpuid_level >= 0x80000008) {
       +  +     +               u32 eax = cpuid_eax(0x80000008);
       +  +     +   
       +  +     +               c->x86_virt_bits = (eax >> 8) & 0xff;
       +  +     +               c->x86_phys_bits = eax & 0xff;
       +  +     +       }
       +  +         
---- ----- ---------
---- ----- ---------    /* early_param could clear that, but recall get it set again */
---- ----- ---------    if (disable_apic)
---- ----- ---------            clear_cpu_cap(c, X86_FEATURE_APIC);
          +             if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
          +                 cpu_devs[c->x86_vendor]->c_early_init)
          +                     cpu_devs[c->x86_vendor]->c_early_init(c);
          +         
          +             validate_pat_support(c);
---------- --------                        DEBUG_STKSZ]
---------- -------- __attribute__((section(".bss.page_aligned")));
          +         }
          +         
          +         /*
          +          * This does the hard work of actually picking apart the CPU stuff...
          +          */
          +         static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
          +         {
          +             int i;
          +         
          +             early_identify_cpu(c);
          +         
          +             init_scattered_cpuid_features(c);
          +         
          +             c->apicid = phys_pkg_id(0);
          +         
          +             /*
          +              * Vendor-specific initialization.  In this section we
          +              * canonicalize the feature flags, meaning if there are
          +              * features a certain CPU supports which CPUID doesn't
          +              * tell us, CPUID claiming incorrect flags, or other bugs,
          +              * we handle them here.
          +              *
          +              * At the end of this section, c->x86_capability better
          +              * indicate the features this CPU genuinely supports!
          +              */
          +             if (this_cpu->c_init)
          +                     this_cpu->c_init(c);
          +         
          +             detect_ht(c);
          +         
          +             /*
          +              * On SMP, boot_cpu_data holds the common feature set between
          +              * all CPUs; so make sure that we indicate which features are
          +              * common between the CPUs.  The first time this routine gets
          +              * executed, c == &boot_cpu_data.
          +              */
          +             if (c != &boot_cpu_data) {
          +                     /* AND the already accumulated flags with these */
          +                     for (i = 0; i < NCAPINTS; i++)
          +                             boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
          +             }
          +         
          +             /* Clear all flags overriden by options */
          +             for (i = 0; i < NCAPINTS; i++)
          +                     c->x86_capability[i] &= ~cleared_cpu_caps[i];
          +         
          +         #ifdef CONFIG_X86_MCE
          +             mcheck_init(c);
          +         #endif
          +             select_idle_routine(c);
          +         
          +         #ifdef CONFIG_NUMA
          +             numa_add_cpu(smp_processor_id());
          +         #endif
          +         
          +         }
          +         
          +         void __cpuinit identify_boot_cpu(void)
          +         {
          +             identify_cpu(&boot_cpu_data);
          +         }
          +         
          +         void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
          +         {
          +             BUG_ON(c == &boot_cpu_data);
          +             identify_cpu(c);
          +             mtrr_ap_init();
          +         }
          +         
          +         static __init int setup_noclflush(char *arg)
          +         {
          +             setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
          +             return 1;
          +         }
          +         __setup("noclflush", setup_noclflush);
          +         
          +         void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
          +         {
          +             if (c->x86_model_id[0])
          +                     printk(KERN_CONT "%s", c->x86_model_id);
          +         
          +             if (c->x86_mask || c->cpuid_level >= 0)
          +                     printk(KERN_CONT " stepping %02x\n", c->x86_mask);
          +             else
          +                     printk(KERN_CONT "\n");
          +         }
          +         
          +         static __init int setup_disablecpuid(char *arg)
          +         {
          +             int bit;
          +             if (get_option(&arg, &bit) && bit < NCAPINTS*32)
          +                     setup_clear_cpu_cap(bit);
          +             else
          +                     return 0;
          +             return 1;
          +         }
          +         __setup("clearcpuid=", setup_disablecpuid);
          +         
          +         cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
          +         
          +         struct x8664_pda **_cpu_pda __read_mostly;
          +         EXPORT_SYMBOL(_cpu_pda);
          +         
          +         struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
          +         
          +         char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
          +         
          +         unsigned long __supported_pte_mask __read_mostly = ~0UL;
          +         EXPORT_SYMBOL_GPL(__supported_pte_mask);
          +         
          +         static int do_not_nx __cpuinitdata;
          +         
          +         /* noexec=on|off
          +         Control non executable mappings for 64bit processes.
          +         
          +         on  Enable(default)
          +         off Disable
          +         */
          +         static int __init nonx_setup(char *str)
          +         {
          +             if (!str)
          +                     return -EINVAL;
          +             if (!strncmp(str, "on", 2)) {
          +                     __supported_pte_mask |= _PAGE_NX;
          +                     do_not_nx = 0;
          +             } else if (!strncmp(str, "off", 3)) {
          +                     do_not_nx = 1;
          +                     __supported_pte_mask &= ~_PAGE_NX;
          +             }
          +             return 0;
          +         }
          +         early_param("noexec", nonx_setup);
          +         
          +         int force_personality32;
          +         
          +         /* noexec32=on|off
          +         Control non executable heap for 32bit processes.
          +         To control the stack too use noexec=off
          +         
          +         on  PROT_READ does not imply PROT_EXEC for 32bit processes (default)
          +         off PROT_READ implies PROT_EXEC
          +         */
          +         static int __init nonx32_setup(char *str)
          +         {
          +             if (!strcmp(str, "on"))
          +                     force_personality32 &= ~READ_IMPLIES_EXEC;
          +             else if (!strcmp(str, "off"))
          +                     force_personality32 |= READ_IMPLIES_EXEC;
          +             return 1;
          +         }
          +         __setup("noexec32=", nonx32_setup);
          +         
          +         void pda_init(int cpu)
          +         {
          +             struct x8664_pda *pda = cpu_pda(cpu);
          +         
          +             /* Setup up data that may be needed in __get_free_pages early */
          +             loadsegment(fs, 0);
          +             loadsegment(gs, 0);
          +             /* Memory clobbers used to order PDA accessed */
          +             mb();
          +             wrmsrl(MSR_GS_BASE, pda);
          +             mb();
          +         
          +             pda->cpunumber = cpu;
          +             pda->irqcount = -1;
          +             pda->kernelstack = (unsigned long)stack_thread_info() -
          +                                      PDA_STACKOFFSET + THREAD_SIZE;
          +             pda->active_mm = &init_mm;
          +             pda->mmu_state = 0;
          +         
          +             if (cpu == 0) {
          +                     /* others are initialized in smpboot.c */
          +                     pda->pcurrent = &init_task;
          +                     pda->irqstackptr = boot_cpu_stack;
          +             } else {
          +                     pda->irqstackptr = (char *)
          +                             __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
          +                     if (!pda->irqstackptr)
          +                             panic("cannot allocate irqstack for cpu %d", cpu);
          +         
          +                     if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
          +                             pda->nodenumber = cpu_to_node(cpu);
          +             }
          +         
          +             pda->irqstackptr += IRQSTACKSIZE-64;
          +         }
          +         
          +         char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
+++++++++++++++++++                        DEBUG_STKSZ] __page_aligned_bss;
          +         
          +         extern asmlinkage void ignore_sysret(void);
          +         
          +         /* May not be marked __init: used by software suspend */
          +         void syscall_init(void)
          +         {
          +             /*
          +              * LSTAR and STAR live in a bit strange symbiosis.
          +              * They both write to the same internal register. STAR allows to
          +              * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
          +              */
          +             wrmsrl(MSR_STAR,  ((u64)__USER32_CS)<<48  | ((u64)__KERNEL_CS)<<32);
          +             wrmsrl(MSR_LSTAR, system_call);
          +             wrmsrl(MSR_CSTAR, ignore_sysret);
          +         
          +         #ifdef CONFIG_IA32_EMULATION
          +             syscall32_cpu_init();
          +         #endif
          +         
          +             /* Flags to clear on syscall */
          +             wrmsrl(MSR_SYSCALL_MASK,
          +                    X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
          +         }
          +         
          +         void __cpuinit check_efer(void)
          +         {
          +             unsigned long efer;
          +         
          +             rdmsrl(MSR_EFER, efer);
          +             if (!(efer & EFER_NX) || do_not_nx)
          +                     __supported_pte_mask &= ~_PAGE_NX;
          +         }
          +         
          +         unsigned long kernel_eflags;
          +         
          +         /*
          +          * Copies of the original ist values from the tss are only accessed during
          +          * debugging, no special alignment required.
          +          */
          +         DEFINE_PER_CPU(struct orig_ist, orig_ist);
          +         
          +         /*
          +          * cpu_init() initializes state that is per-CPU. Some data is already
          +          * initialized (naturally) in the bootstrap process, such as the GDT
          +          * and IDT. We reload them nevertheless, this function acts as a
          +          * 'CPU state barrier', nothing should get across.
          +          * A lot of state is already set up in PDA init.
          +          */
          +         void __cpuinit cpu_init(void)
          +         {
          +             int cpu = stack_smp_processor_id();
          +             struct tss_struct *t = &per_cpu(init_tss, cpu);
          +             struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
          +             unsigned long v;
          +             char *estacks = NULL;
          +             struct task_struct *me;
          +             int i;
          +         
          +             /* CPU 0 is initialised in head64.c */
          +             if (cpu != 0)
          +                     pda_init(cpu);
          +             else
          +                     estacks = boot_exception_stacks;
          +         
          +             me = current;
          +         
          +             if (cpu_test_and_set(cpu, cpu_initialized))
          +                     panic("CPU#%d already initialized!\n", cpu);
          +         
          +             printk(KERN_INFO "Initializing CPU#%d\n", cpu);
          +         
          +             clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
          +         
          +             /*
          +              * Initialize the per-CPU GDT with the boot GDT,
          +              * and set up the GDT descriptor:
          +              */
          +         
          +             switch_to_new_gdt();
          +             load_idt((const struct desc_ptr *)&idt_descr);
          +         
          +             memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
          +             syscall_init();
          +         
          +             wrmsrl(MSR_FS_BASE, 0);
          +             wrmsrl(MSR_KERNEL_GS_BASE, 0);
          +             barrier();
          +         
          +             check_efer();
          +         
          +             /*
          +              * set up and load the per-CPU TSS
          +              */
          +             for (v = 0; v < N_EXCEPTION_STACKS; v++) {
          +                     static const unsigned int order[N_EXCEPTION_STACKS] = {
          +                             [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
          +                             [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
          +                     };
          +                     if (cpu) {
          +                             estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
          +                             if (!estacks)
          +                                     panic("Cannot allocate exception stack %ld %d\n",
          +                                           v, cpu);
          +                     }
          +                     estacks += PAGE_SIZE << order[v];
          +                     orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks;
          +             }
          +         
          +             t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
          +             /*
          +              * <= is required because the CPU will access up to
          +              * 8 bits beyond the end of the IO permission bitmap.
          +              */
          +             for (i = 0; i <= IO_BITMAP_LONGS; i++)
          +                     t->io_bitmap[i] = ~0UL;
          +         
          +             atomic_inc(&init_mm.mm_count);
          +             me->active_mm = &init_mm;
          +             if (me->mm)
          +                     BUG();
          +             enter_lazy_tlb(&init_mm, me);
          +         
          +             load_sp0(t, &current->thread);
          +             set_tss_desc(cpu, t);
          +             load_TR_desc();
          +             load_LDT(&init_mm.context);
          +         
          +         #ifdef CONFIG_KGDB
          +             /*
          +              * If the kgdb is connected no debug regs should be altered.  This
          +              * is only applicable when KGDB and a KGDB I/O module are built
          +              * into the kernel and you are using early debugging with
          +              * kgdbwait. KGDB will control the kernel HW breakpoint registers.
          +              */
          +             if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
          +                     arch_kgdb_ops.correct_hw_break();
          +             else {
          +         #endif
          +             /*
          +              * Clear all 6 debug registers:
          +              */
          +         
          +             set_debugreg(0UL, 0);
          +             set_debugreg(0UL, 1);
          +             set_debugreg(0UL, 2);
          +             set_debugreg(0UL, 3);
          +             set_debugreg(0UL, 6);
          +             set_debugreg(0UL, 7);
          +         #ifdef CONFIG_KGDB
          +             /* If the kgdb is connected no debug regs should be altered. */
          +             }
          +         #endif
          +         
          +             fpu_init();
          +         
          +             raw_local_save_flags(kernel_eflags);
          +         
          +             if (is_uv_system())
          +                     uv_cpu_init();
          +         }
Simple merge
                    #include <asm/percpu.h>
                    #include <asm/dwarf2.h>
                    #include <asm/processor-flags.h>
          -         #include "irq_vectors.h"
       +  +     +   #include <asm/ftrace.h>
          +         #include <asm/irq_vectors.h>
                    
                    /*
                     * We use macros for low-level operations which need to be overridden
                    ENTRY(xen_sysenter_target)
                        RING0_INT_FRAME
                        addl $5*4, %esp         /* remove xen-provided frame */
++++++++++++++++++ +    CFI_ADJUST_CFA_OFFSET -5*4
                        jmp sysenter_past_esp
       +  +             CFI_ENDPROC
                    
                    ENTRY(xen_hypervisor_callback)
                        CFI_STARTPROC
@@@@@@@@@@@@@@@@@@@@@ -349,8 -349,8 -349,8 -349,8 -349,8 -349,8 -349,8 -243,8 -349,8 -349,8 -244,7 -349,8 -349,8 -349,8 -349,8 -349,7 -243,8 -349,8 -349,8 -349,8 +349,7 @@@@@@@@@@@@@@@@@@@@@ ENTRY(system_call_after_swapgs
                        movq  %rcx,RIP-ARGOFFSET(%rsp)
                        CFI_REL_OFFSET rip,RIP-ARGOFFSET
                        GET_THREAD_INFO(%rcx)
---------- ---- ----    testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
---------- ---- ----            TI_flags(%rcx)
          -             testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
+++++++++++++++ ++++    testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
                        jnz tracesys
                        cmpq $__NR_syscall_max,%rax
                        ja badsys
index ec024b3,ec024b3,384b49f,ec024b3,ec024b3,ec024b3,716b892,8dfe9db,ec024b3,ec024b3,0000000,e0b44b7,716b892,ec024b3,ec024b3,ec024b3,716b892,ec024b3,ec024b3,ec024b3..ac6d512
mode 100644,100644,100644,100644,100644,100644,100644,100644,100644,100644,000000,100644,100644,100644,100644,100644,100644,100644,100644,100644..100644
--- /dev/null
      --    -   -               smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0);
          +         /*
          +          *  NMI watchdog support on APIC systems
          +          *
          +          *  Started by Ingo Molnar <mingo@redhat.com>
          +          *
          +          *  Fixes:
          +          *  Mikael Pettersson       : AMD K7 support for local APIC NMI watchdog.
          +          *  Mikael Pettersson       : Power Management for local APIC NMI watchdog.
          +          *  Mikael Pettersson       : Pentium 4 support for local APIC NMI watchdog.
          +          *  Pavel Machek and
          +          *  Mikael Pettersson       : PM converted to driver model. Disable/enable API.
          +          */
          +         
          +         #include <asm/apic.h>
          +         
          +         #include <linux/nmi.h>
          +         #include <linux/mm.h>
          +         #include <linux/delay.h>
          +         #include <linux/interrupt.h>
          +         #include <linux/module.h>
          +         #include <linux/sysdev.h>
          +         #include <linux/sysctl.h>
          +         #include <linux/percpu.h>
          +         #include <linux/kprobes.h>
          +         #include <linux/cpumask.h>
          +         #include <linux/kernel_stat.h>
          +         #include <linux/kdebug.h>
          +         #include <linux/smp.h>
          +         
          +         #include <asm/i8259.h>
          +         #include <asm/io_apic.h>
          +         #include <asm/smp.h>
          +         #include <asm/nmi.h>
          +         #include <asm/proto.h>
          +         #include <asm/timer.h>
          +         
          +         #include <asm/mce.h>
          +         
          +         #include <mach_traps.h>
          +         
          +         int unknown_nmi_panic;
          +         int nmi_watchdog_enabled;
          +         
          +         static cpumask_t backtrace_mask = CPU_MASK_NONE;
          +         
          +         /* nmi_active:
          +          * >0: the lapic NMI watchdog is active, but can be disabled
          +          * <0: the lapic NMI watchdog has not been set up, and cannot
          +          *     be enabled
          +          *  0: the lapic NMI watchdog is disabled, but can be enabled
          +          */
          +         atomic_t nmi_active = ATOMIC_INIT(0);               /* oprofile uses this */
          +         EXPORT_SYMBOL(nmi_active);
          +         
          +         unsigned int nmi_watchdog = NMI_NONE;
          +         EXPORT_SYMBOL(nmi_watchdog);
          +         
          +         static int panic_on_timeout;
          +         
          +         static unsigned int nmi_hz = HZ;
          +         static DEFINE_PER_CPU(short, wd_enabled);
          +         static int endflag __initdata;
          +         
          +         static inline unsigned int get_nmi_count(int cpu)
          +         {
          +         #ifdef CONFIG_X86_64
          +             return cpu_pda(cpu)->__nmi_count;
          +         #else
          +             return nmi_count(cpu);
          +         #endif
          +         }
          +         
          +         static inline int mce_in_progress(void)
          +         {
          +         #if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
          +             return atomic_read(&mce_entry) > 0;
          +         #endif
          +             return 0;
          +         }
          +         
          +         /*
          +          * Take the local apic timer and PIT/HPET into account. We don't
          +          * know which one is active, when we have highres/dyntick on
          +          */
          +         static inline unsigned int get_timer_irqs(int cpu)
          +         {
          +         #ifdef CONFIG_X86_64
          +             return read_pda(apic_timer_irqs) + read_pda(irq0_irqs);
          +         #else
          +             return per_cpu(irq_stat, cpu).apic_timer_irqs +
          +                     per_cpu(irq_stat, cpu).irq0_irqs;
          +         #endif
          +         }
          +         
          +         #ifdef CONFIG_SMP
          +         /*
          +          * The performance counters used by NMI_LOCAL_APIC don't trigger when
          +          * the CPU is idle. To make sure the NMI watchdog really ticks on all
          +          * CPUs during the test make them busy.
          +          */
          +         static __init void nmi_cpu_busy(void *data)
          +         {
          +             local_irq_enable_in_hardirq();
          +             /*
          +              * Intentionally don't use cpu_relax here. This is
          +              * to make sure that the performance counter really ticks,
          +              * even if there is a simulator or similar that catches the
          +              * pause instruction. On a real HT machine this is fine because
          +              * all other CPUs are busy with "useless" delay loops and don't
          +              * care if they get somewhat less cycles.
          +              */
          +             while (endflag == 0)
          +                     mb();
          +         }
          +         #endif
          +         
          +         int __init check_nmi_watchdog(void)
          +         {
          +             unsigned int *prev_nmi_count;
          +             int cpu;
          +         
          +             if (!nmi_watchdog_active() || !atomic_read(&nmi_active))
          +                     return 0;
          +         
          +             prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
          +             if (!prev_nmi_count)
          +                     goto error;
          +         
          +             printk(KERN_INFO "Testing NMI watchdog ... ");
          +         
          +         #ifdef CONFIG_SMP
          +             if (nmi_watchdog == NMI_LOCAL_APIC)
-- ------- ---------    apic_write_around(APIC_LVT0, APIC_DM_NMI);
      ++  + +   +               smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
          +         #endif
          +         
          +             for_each_possible_cpu(cpu)
          +                     prev_nmi_count[cpu] = get_nmi_count(cpu);
          +             local_irq_enable();
          +             mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
          +         
          +             for_each_online_cpu(cpu) {
          +                     if (!per_cpu(wd_enabled, cpu))
          +                             continue;
          +                     if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
          +                             printk(KERN_WARNING "WARNING: CPU#%d: NMI "
          +                                     "appears to be stuck (%d->%d)!\n",
          +                                     cpu,
          +                                     prev_nmi_count[cpu],
          +                                     get_nmi_count(cpu));
          +                             per_cpu(wd_enabled, cpu) = 0;
          +                             atomic_dec(&nmi_active);
          +                     }
          +             }
          +             endflag = 1;
          +             if (!atomic_read(&nmi_active)) {
          +                     kfree(prev_nmi_count);
          +                     atomic_set(&nmi_active, -1);
          +                     goto error;
          +             }
          +             printk("OK.\n");
          +         
          +             /*
          +              * now that we know it works we can reduce NMI frequency to
          +              * something more reasonable; makes a difference in some configs
          +              */
          +             if (nmi_watchdog == NMI_LOCAL_APIC)
          +                     nmi_hz = lapic_adjust_nmi_hz(1);
          +         
          +             kfree(prev_nmi_count);
          +             return 0;
          +         error:
          +             if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259)
          +                     disable_8259A_irq(0);
       +  +         #ifdef CONFIG_X86_32
       +  +             timer_ack = 0;
       +  +         #endif
          +             return -1;
          +         }
          +         
          +         static int __init setup_nmi_watchdog(char *str)
          +         {
          +             unsigned int nmi;
          +         
          +             if (!strncmp(str, "panic", 5)) {
          +                     panic_on_timeout = 1;
          +                     str = strchr(str, ',');
          +                     if (!str)
          +                             return 1;
          +                     ++str;
          +             }
          +         
          +             get_option(&str, &nmi);
          +         
          +             if (nmi >= NMI_INVALID)
          +                     return 0;
          +         
          +             nmi_watchdog = nmi;
          +             return 1;
          +         }
          +         __setup("nmi_watchdog=", setup_nmi_watchdog);
          +         
          +         /*
          +          * Suspend/resume support
          +          */
          +         #ifdef CONFIG_PM
          +         
          +         static int nmi_pm_active; /* nmi_active before suspend */
          +         
          +         static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
          +         {
          +             /* only CPU0 goes here, other CPUs should be offline */
          +             nmi_pm_active = atomic_read(&nmi_active);
          +             stop_apic_nmi_watchdog(NULL);
          +             BUG_ON(atomic_read(&nmi_active) != 0);
          +             return 0;
          +         }
          +         
          +         static int lapic_nmi_resume(struct sys_device *dev)
          +         {
          +             /* only CPU0 goes here, other CPUs should be offline */
          +             if (nmi_pm_active > 0) {
          +                     setup_apic_nmi_watchdog(NULL);
          +                     touch_nmi_watchdog();
          +             }
          +             return 0;
          +         }
          +         
          +         static struct sysdev_class nmi_sysclass = {
          +             .name           = "lapic_nmi",
          +             .resume         = lapic_nmi_resume,
          +             .suspend        = lapic_nmi_suspend,
          +         };
          +         
          +         static struct sys_device device_lapic_nmi = {
          +             .id     = 0,
          +             .cls    = &nmi_sysclass,
          +         };
          +         
          +         static int __init init_lapic_nmi_sysfs(void)
          +         {
          +             int error;
          +         
          +             /*
          +              * should really be a BUG_ON but b/c this is an
          +              * init call, it just doesn't work.  -dcz
          +              */
          +             if (nmi_watchdog != NMI_LOCAL_APIC)
          +                     return 0;
          +         
          +             if (atomic_read(&nmi_active) < 0)
          +                     return 0;
          +         
          +             error = sysdev_class_register(&nmi_sysclass);
          +             if (!error)
          +                     error = sysdev_register(&device_lapic_nmi);
          +             return error;
          +         }
          +         
          +         /* must come after the local APIC's device_initcall() */
          +         late_initcall(init_lapic_nmi_sysfs);
          +         
          +         #endif      /* CONFIG_PM */
          +         
          +         static void __acpi_nmi_enable(void *__unused)
          +         {
      --    -   -               on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
++ +++++++++++++++++    apic_write(APIC_LVT0, APIC_DM_NMI);
          +         }
          +         
          +         /*
          +          * Enable timer based NMIs on all CPUs:
          +          */
          +         void acpi_nmi_enable(void)
          +         {
          +             if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
-- ------- ---------    apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
      ++  + +   +               on_each_cpu(__acpi_nmi_enable, NULL, 1);
          +         }
          +         
          +         static void __acpi_nmi_disable(void *__unused)
          +         {
      --    -   -               on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
++ +++++++++++++++++    apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
          +         }
          +         
          +         /*
          +          * Disable timer based NMIs on all CPUs:
          +          */
          +         void acpi_nmi_disable(void)
          +         {
          +             if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
      ++  + +   +               on_each_cpu(__acpi_nmi_disable, NULL, 1);
          +         }
          +         
          +         void setup_apic_nmi_watchdog(void *unused)
          +         {
          +             if (__get_cpu_var(wd_enabled))
          +                     return;
          +         
          +             /* cheap hack to support suspend/resume */
          +             /* if cpu0 is not active neither should the other cpus */
          +             if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0)
          +                     return;
          +         
          +             switch (nmi_watchdog) {
          +             case NMI_LOCAL_APIC:
          +                      /* enable it before to avoid race with handler */
          +                     __get_cpu_var(wd_enabled) = 1;
          +                     if (lapic_watchdog_init(nmi_hz) < 0) {
          +                             __get_cpu_var(wd_enabled) = 0;
          +                             return;
          +                     }
          +                     /* FALL THROUGH */
          +             case NMI_IO_APIC:
          +                     __get_cpu_var(wd_enabled) = 1;
          +                     atomic_inc(&nmi_active);
          +             }
          +         }
          +         
          +         void stop_apic_nmi_watchdog(void *unused)
          +         {
          +             /* only support LOCAL and IO APICs for now */
          +             if (!nmi_watchdog_active())
          +                     return;
          +             if (__get_cpu_var(wd_enabled) == 0)
          +                     return;
          +             if (nmi_watchdog == NMI_LOCAL_APIC)
          +                     lapic_watchdog_stop();
          +             __get_cpu_var(wd_enabled) = 0;
          +             atomic_dec(&nmi_active);
          +         }
          +         
          +         /*
          +          * the best way to detect whether a CPU has a 'hard lockup' problem
          +          * is to check it's local APIC timer IRQ counts. If they are not
          +          * changing then that CPU has some problem.
          +          *
          +          * as these watchdog NMI IRQs are generated on every CPU, we only
          +          * have to check the current processor.
          +          *
          +          * since NMIs don't listen to _any_ locks, we have to be extremely
          +          * careful not to rely on unsafe variables. The printk might lock
          +          * up though, so we have to break up any console locks first ...
          +          * [when there will be more tty-related locks, break them up here too!]
          +          */
          +         
          +         static DEFINE_PER_CPU(unsigned, last_irq_sum);
          +         static DEFINE_PER_CPU(local_t, alert_counter);
          +         static DEFINE_PER_CPU(int, nmi_touch);
          +         
          +         void touch_nmi_watchdog(void)
          +         {
          +             if (nmi_watchdog_active()) {
          +                     unsigned cpu;
          +         
          +                     /*
          +                      * Tell other CPUs to reset their alert counters. We cannot
          +                      * do it ourselves because the alert count increase is not
          +                      * atomic.
          +                      */
          +                     for_each_present_cpu(cpu) {
          +                             if (per_cpu(nmi_touch, cpu) != 1)
          +                                     per_cpu(nmi_touch, cpu) = 1;
          +                     }
          +             }
          +         
          +             /*
          +              * Tickle the softlockup detector too:
          +              */
          +             touch_softlockup_watchdog();
          +         }
          +         EXPORT_SYMBOL(touch_nmi_watchdog);
          +         
          +         notrace __kprobes int
          +         nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
          +         {
          +             /*
          +              * Since current_thread_info()-> is always on the stack, and we
          +              * always switch the stack NMI-atomically, it's safe to use
          +              * smp_processor_id().
          +              */
          +             unsigned int sum;
          +             int touched = 0;
          +             int cpu = smp_processor_id();
          +             int rc = 0;
          +         
          +             /* check for other users first */
          +             if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
          +                             == NOTIFY_STOP) {
          +                     rc = 1;
          +                     touched = 1;
          +             }
          +         
          +             sum = get_timer_irqs(cpu);
          +         
          +             if (__get_cpu_var(nmi_touch)) {
          +                     __get_cpu_var(nmi_touch) = 0;
          +                     touched = 1;
          +             }
          +         
          +             if (cpu_isset(cpu, backtrace_mask)) {
          +                     static DEFINE_SPINLOCK(lock);   /* Serialise the printks */
          +         
          +                     spin_lock(&lock);
          +                     printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
          +                     dump_stack();
          +                     spin_unlock(&lock);
          +                     cpu_clear(cpu, backtrace_mask);
          +             }
          +         
          +             /* Could check oops_in_progress here too, but it's safer not to */
          +             if (mce_in_progress())
          +                     touched = 1;
          +         
          +             /* if the none of the timers isn't firing, this cpu isn't doing much */
          +             if (!touched && __get_cpu_var(last_irq_sum) == sum) {
          +                     /*
          +                      * Ayiee, looks like this CPU is stuck ...
          +                      * wait a few IRQs (5 seconds) before doing the oops ...
          +                      */
          +                     local_inc(&__get_cpu_var(alert_counter));
          +                     if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz)
          +                             /*
          +                              * die_nmi will return ONLY if NOTIFY_STOP happens..
          +                              */
          +                             die_nmi("BUG: NMI Watchdog detected LOCKUP",
          +                                     regs, panic_on_timeout);
          +             } else {
          +                     __get_cpu_var(last_irq_sum) = sum;
          +                     local_set(&__get_cpu_var(alert_counter), 0);
          +             }
          +         
          +             /* see if the nmi watchdog went off */
          +             if (!__get_cpu_var(wd_enabled))
          +                     return rc;
          +             switch (nmi_watchdog) {
          +             case NMI_LOCAL_APIC:
          +                     rc |= lapic_wd_event(nmi_hz);
          +                     break;
          +             case NMI_IO_APIC:
          +                     /*
          +                      * don't know how to accurately check for this.
          +                      * just assume it was a watchdog timer interrupt
          +                      * This matches the old behaviour.
          +                      */
          +                     rc = 1;
          +                     break;
          +             }
          +             return rc;
          +         }
          +         
          +         #ifdef CONFIG_SYSCTL
          +         
+++++++++++ ++++++++static int __init setup_unknown_nmi_panic(char *str)
+++++++++++ ++++++++{
+++++++++++ ++++++++    unknown_nmi_panic = 1;
+++++++++++ ++++++++    return 1;
+++++++++++ ++++++++}
+++++++++++ ++++++++__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
+++++++++++ ++++++++
          +         static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
          +         {
          +             unsigned char reason = get_nmi_reason();
          +             char buf[64];
          +         
          +             sprintf(buf, "NMI received for unknown reason %02x\n", reason);
          +             die_nmi(buf, regs, 1); /* Always panic here */
          +             return 0;
          +         }
          +         
          +         /*
          +          * proc handler for /proc/sys/kernel/nmi
          +          */
          +         int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
          +                             void __user *buffer, size_t *length, loff_t *ppos)
          +         {
          +             int old_state;
          +         
          +             nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
          +             old_state = nmi_watchdog_enabled;
          +             proc_dointvec(table, write, file, buffer, length, ppos);
          +             if (!!old_state == !!nmi_watchdog_enabled)
          +                     return 0;
          +         
          +             if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) {
          +                     printk(KERN_WARNING
          +                             "NMI watchdog is permanently disabled\n");
          +                     return -EIO;
          +             }
          +         
          +             if (nmi_watchdog == NMI_LOCAL_APIC) {
          +                     if (nmi_watchdog_enabled)
          +                             enable_lapic_nmi_watchdog();
          +                     else
          +                             disable_lapic_nmi_watchdog();
          +             } else {
          +                     printk(KERN_WARNING
          +                             "NMI watchdog doesn't know what hardware to touch\n");
          +                     return -EIO;
          +             }
          +             return 0;
          +         }
          +         
          +         #endif /* CONFIG_SYSCTL */
          +         
          +         int do_nmi_callback(struct pt_regs *regs, int cpu)
          +         {
          +         #ifdef CONFIG_SYSCTL
          +             if (unknown_nmi_panic)
          +                     return unknown_nmi_panic_callback(regs, cpu);
          +         #endif
          +             return 0;
          +         }
          +         
          +         void __trigger_all_cpu_backtrace(void)
          +         {
          +             int i;
          +         
          +             backtrace_mask = cpu_online_map;
          +             /* Wait for up to 10 seconds for all CPUs to do the backtrace */
          +             for (i = 0; i < 10 * 1000; i++) {
          +                     if (cpus_empty(backtrace_mask))
          +                             break;
          +                     mdelay(1);
          +             }
          +         }
                    #include <asm/desc.h>
                    #include <asm/setup.h>
                    #include <asm/arch_hooks.h>
+++++++++++++++++++ #include <asm/pgtable.h>
                    #include <asm/time.h>
          +         #include <asm/pgalloc.h>
                    #include <asm/irq.h>
                    #include <asm/delay.h>
                    #include <asm/fixmap.h>
                    
                    #include <asm/proto.h>
                    #include <asm/dma.h>
------- ------------#include <asm/gart.h>
+++++++ ++++++++++++#include <asm/iommu.h>
                    #include <asm/calgary.h>
          +         #include <asm/amd_iommu.h>
                    
--- ----------------int forbid_dac __read_mostly;
--- ----------------EXPORT_SYMBOL(forbid_dac);
+++ ++++++++++++++++static int forbid_dac __read_mostly;
                    
                    const struct dma_mapping_ops *dma_ops;
                    EXPORT_SYMBOL(dma_ops);
@@@@@@@@@@@@@@@@@@@@@ -123,12 -123,12 -123,12 -122,12 -123,12 -123,12 -123,12 -120,9 -123,12 -123,12 -121,10 -123,12 -123,12 -123,12 -123,12 -123,12 -123,12 -123,12 -123,12 -123,12 +119,9 @@@@@@@@@@@@@@@@@@@@@ void __init pci_iommu_alloc(void
                    
                        detect_intel_iommu();
                    
          -         #ifdef CONFIG_SWIOTLB
          +             amd_iommu_detect();
          +         
------- -- ---------#ifdef CONFIG_SWIOTLB
                        pci_swiotlb_init();
------- ------------#endif
                    }
                    #endif
                    
@@@@@@@@@@@@@@@@@@@@@ -505,12 -505,12 -505,12 -504,12 -505,12 -505,12 -505,12 -496,9 -505,12 -505,12 -501,10 -505,12 -505,12 -505,12 -505,12 -505,12 -505,12 -505,12 -505,12 -505,12 +495,9 @@@@@@@@@@@@@@@@@@@@@ static int __init pci_iommu_init(void
                    
                        intel_iommu_init();
                    
          -         #ifdef CONFIG_GART_IOMMU
          +             amd_iommu_init();
          +         
------- -- ---------#ifdef CONFIG_GART_IOMMU
                        gart_iommu_init();
------- ------------#endif
                    
                        no_iommu_init();
                        return 0;
Simple merge
                    #include <linux/sched.h>
                    #include <linux/module.h>
                    #include <linux/pm.h>
          +         #include <linux/clockchips.h>
      +++ + ++  +   #include <asm/system.h>
      +++ + ++  +   
      +++ + ++  +   unsigned long idle_halt;
      +++ + ++  +   EXPORT_SYMBOL(idle_halt);
      +++ + ++  +   unsigned long idle_nomwait;
      +++ + ++  +   EXPORT_SYMBOL(idle_nomwait);
                    
                    struct kmem_cache *task_xstate_cachep;
+++++ ++++++++++++++static int force_mwait __cpuinitdata;
                    
                    int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
                    {
                     *
                     * idle=mwait overrides this decision and forces the usage of mwait.
                     */
+++ ++++++++++++++++static int __cpuinitdata force_mwait;
          +         
          +         #define MWAIT_INFO                  0x05
          +         #define MWAIT_ECX_EXTENDED_INFO             0x01
          +         #define MWAIT_EDX_C1                        0xf0
          +         
                    static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
                    {
          +             u32 eax, ebx, ecx, edx;
          +         
                        if (force_mwait)
                                return 1;
                    
          -         #include <linux/kernel.h>
          +         /*
          +          *  Copyright (C) 1995  Linus Torvalds
          +          *
          +          *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
          +          *
          +          *  Memory region support
          +          *  David Parsons <orc@pell.chi.il.us>, July-August 1999
          +          *
          +          *  Added E820 sanitization routine (removes overlapping memory regions);
          +          *  Brian Moyle <bmoyle@mvista.com>, February 2001
          +          *
          +          * Moved CPU detection code to cpu/${cpu}.c
          +          *    Patrick Mochel <mochel@osdl.org>, March 2002
          +          *
          +          *  Provisions for empty E820 memory regions (reported by certain BIOSes).
          +          *  Alex Achenbach <xela@slit.de>, December 2002.
          +          *
          +          */
          +         
          +         /*
          +          * This file handles the architecture-dependent parts of initialization
          +          */
          +         
          +         #include <linux/sched.h>
          +         #include <linux/mm.h>
          +         #include <linux/mmzone.h>
          +         #include <linux/screen_info.h>
          +         #include <linux/ioport.h>
          +         #include <linux/acpi.h>
          +         #include <linux/apm_bios.h>
          +         #include <linux/initrd.h>
          +         #include <linux/bootmem.h>
          +         #include <linux/seq_file.h>
          +         #include <linux/console.h>
          +         #include <linux/mca.h>
          +         #include <linux/root_dev.h>
          +         #include <linux/highmem.h>
                    #include <linux/module.h>
          +         #include <linux/efi.h>
                    #include <linux/init.h>
          -         #include <linux/bootmem.h>
          +         #include <linux/edd.h>
          +         #include <linux/iscsi_ibft.h>
          +         #include <linux/nodemask.h>
          +         #include <linux/kexec.h>
          +         #include <linux/dmi.h>
          +         #include <linux/pfn.h>
          +         #include <linux/pci.h>
          +         #include <asm/pci-direct.h>
          +         #include <linux/init_ohci1394_dma.h>
          +         #include <linux/kvm_para.h>
          +         
          +         #include <linux/errno.h>
          +         #include <linux/kernel.h>
          +         #include <linux/stddef.h>
          +         #include <linux/unistd.h>
          +         #include <linux/ptrace.h>
          +         #include <linux/slab.h>
          +         #include <linux/user.h>
          +         #include <linux/delay.h>
--- ------ ---------#include <linux/highmem.h>
          +         
          +         #include <linux/kallsyms.h>
--- ------ ---------#include <linux/edd.h>
--- ------ ---------#include <linux/iscsi_ibft.h>
--- ------ ---------#include <linux/kexec.h>
          +         #include <linux/cpufreq.h>
          +         #include <linux/dma-mapping.h>
          +         #include <linux/ctype.h>
          +         #include <linux/uaccess.h>
          +         
                    #include <linux/percpu.h>
          -         #include <asm/smp.h>
          -         #include <asm/percpu.h>
          +         #include <linux/crash_dump.h>
          +         
          +         #include <video/edid.h>
          +         
          +         #include <asm/mtrr.h>
          +         #include <asm/apic.h>
          +         #include <asm/e820.h>
          +         #include <asm/mpspec.h>
          +         #include <asm/setup.h>
          +         #include <asm/arch_hooks.h>
          +         #include <asm/efi.h>
                    #include <asm/sections.h>
          +         #include <asm/dmi.h>
          +         #include <asm/io_apic.h>
          +         #include <asm/ist.h>
          +         #include <asm/vmi.h>
          +         #include <setup_arch.h>
          +         #include <asm/bios_ebda.h>
          +         #include <asm/cacheflush.h>
                    #include <asm/processor.h>
          -         #include <asm/setup.h>
          +         #include <asm/bugs.h>
          +         
          +         #include <asm/system.h>
          +         #include <asm/vsyscall.h>
          +         #include <asm/smp.h>
          +         #include <asm/desc.h>
          +         #include <asm/dma.h>
------- -- ---------#include <asm/gart.h>
+++++++ ++++++++++++#include <asm/iommu.h>
          +         #include <asm/mmu_context.h>
          +         #include <asm/proto.h>
          +         
          +         #include <mach_apic.h>
          +         #include <asm/paravirt.h>
          +         
          +         #include <asm/percpu.h>
--- ------ ---------#include <asm/sections.h>
                    #include <asm/topology.h>
          -         #include <asm/mpspec.h>
                    #include <asm/apicdef.h>
          +         #ifdef CONFIG_X86_64
          +         #include <asm/numa_64.h>
          +         #endif
                    
          -         #ifdef CONFIG_X86_LOCAL_APIC
          -         unsigned int num_processors;
          -         unsigned disabled_cpus __cpuinitdata;
          -         /* Processor that is doing the boot up */
          -         unsigned int boot_cpu_physical_apicid = -1U;
          -         EXPORT_SYMBOL(boot_cpu_physical_apicid);
          +         #ifndef ARCH_SETUP
          +         #define ARCH_SETUP
          +         #endif
                    
          -         DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
          -         EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
          +         #ifndef CONFIG_DEBUG_BOOT_PARAMS
          +         struct boot_params __initdata boot_params;
          +         #else
          +         struct boot_params boot_params;
          +         #endif
                    
          -         /* Bitmask of physically existing CPUs */
          -         physid_mask_t phys_cpu_present_map;
          +         /*
          +          * Machine setup..
          +          */
          +         static struct resource data_resource = {
          +             .name   = "Kernel data",
          +             .start  = 0,
          +             .end    = 0,
          +             .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
          +         };
          +         
          +         static struct resource code_resource = {
          +             .name   = "Kernel code",
          +             .start  = 0,
          +             .end    = 0,
          +             .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
          +         };
          +         
          +         static struct resource bss_resource = {
          +             .name   = "Kernel bss",
          +             .start  = 0,
          +             .end    = 0,
          +             .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
          +         };
          +         
          +         
          +         #ifdef CONFIG_X86_32
          +         /* This value is set up by the early boot code to point to the value
          +            immediately after the boot time page tables.  It contains a *physical*
          +            address, and must not be in the .bss segment! */
          +         unsigned long init_pg_tables_start __initdata = ~0UL;
          +         unsigned long init_pg_tables_end __initdata = ~0UL;
          +         
          +         static struct resource video_ram_resource = {
          +             .name   = "Video RAM area",
          +             .start  = 0xa0000,
          +             .end    = 0xbffff,
          +             .flags  = IORESOURCE_BUSY | IORESOURCE_MEM
          +         };
          +         
          +         /* cpu data as detected by the assembly code in head.S */
          +         struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1};
          +         /* common cpu data for all cpus */
          +         struct cpuinfo_x86 boot_cpu_data __read_mostly = {0, 0, 0, 0, -1, 1, 0, 0, -1};
          +         EXPORT_SYMBOL(boot_cpu_data);
          +         static void set_mca_bus(int x)
          +         {
          +         #ifdef CONFIG_MCA
          +             MCA_bus = x;
          +         #endif
          +         }
          +         
          +         unsigned int def_to_bigsmp;
          +         
          +         /* for MCA, but anyone else can use it if they want */
          +         unsigned int machine_id;
          +         unsigned int machine_submodel_id;
          +         unsigned int BIOS_revision;
          +         
          +         struct apm_info apm_info;
          +         EXPORT_SYMBOL(apm_info);
          +         
          +         #if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
          +             defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
          +         struct ist_info ist_info;
          +         EXPORT_SYMBOL(ist_info);
          +         #else
          +         struct ist_info ist_info;
                    #endif
                    
          -         #if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP)
          +         #else
          +         struct cpuinfo_x86 boot_cpu_data __read_mostly;
          +         EXPORT_SYMBOL(boot_cpu_data);
          +         #endif
          +         
          +         
          +         #if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
          +         unsigned long mmu_cr4_features;
          +         #else
          +         unsigned long mmu_cr4_features = X86_CR4_PAE;
          +         #endif
          +         
          +         /* Boot loader ID as an integer, for the benefit of proc_dointvec */
          +         int bootloader_type;
          +         
                    /*
          -          * Copy data used in early init routines from the initial arrays to the
          -          * per cpu data areas.  These arrays then become expendable and the
          -          * *_early_ptr's are zeroed indicating that the static arrays are gone.
          +          * Early DMI memory
                     */
          -         static void __init setup_per_cpu_maps(void)
          +         int dmi_alloc_index;
          +         char dmi_alloc_data[DMI_MAX_DATA];
          +         
          +         /*
          +          * Setup options
          +          */
          +         struct screen_info screen_info;
          +         EXPORT_SYMBOL(screen_info);
          +         struct edid_info edid_info;
          +         EXPORT_SYMBOL_GPL(edid_info);
          +         
          +         extern int root_mountflags;
          +         
          +         unsigned long saved_video_mode;
          +         
          +         #define RAMDISK_IMAGE_START_MASK    0x07FF
          +         #define RAMDISK_PROMPT_FLAG         0x8000
          +         #define RAMDISK_LOAD_FLAG           0x4000
          +         
          +         static char __initdata command_line[COMMAND_LINE_SIZE];
          +         
          +         #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
          +         struct edd edd;
          +         #ifdef CONFIG_EDD_MODULE
          +         EXPORT_SYMBOL(edd);
          +         #endif
          +         /**
          +          * copy_edd() - Copy the BIOS EDD information
          +          *              from boot_params into a safe place.
          +          *
          +          */
          +         static inline void copy_edd(void)
          +         {
          +              memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer,
          +                 sizeof(edd.mbr_signature));
          +              memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info));
          +              edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries;
          +              edd.edd_info_nr = boot_params.eddbuf_entries;
          +         }
          +         #else
          +         static inline void copy_edd(void)
          +         {
          +         }
          +         #endif
          +         
          +         #ifdef CONFIG_BLK_DEV_INITRD
          +         
          +         #ifdef CONFIG_X86_32
          +         
          +         #define MAX_MAP_CHUNK       (NR_FIX_BTMAPS << PAGE_SHIFT)
          +         static void __init relocate_initrd(void)
                    {
          -             int cpu;
                    
          -             for_each_possible_cpu(cpu) {
          -                     per_cpu(x86_cpu_to_apicid, cpu) = x86_cpu_to_apicid_init[cpu];
          -                     per_cpu(x86_bios_cpu_apicid, cpu) =
          -                                                     x86_bios_cpu_apicid_init[cpu];
          -         #ifdef CONFIG_NUMA
          -                     per_cpu(x86_cpu_to_node_map, cpu) =
          -                                                     x86_cpu_to_node_map_init[cpu];
          +             u64 ramdisk_image = boot_params.hdr.ramdisk_image;
          +             u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
          +             u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
          +             u64 ramdisk_here;
          +             unsigned long slop, clen, mapaddr;
          +             char *p, *q;
          +         
          +             /* We need to move the initrd down into lowmem */
          +             ramdisk_here = find_e820_area(0, end_of_lowmem, ramdisk_size,
          +                                              PAGE_SIZE);
          +         
          +             if (ramdisk_here == -1ULL)
          +                     panic("Cannot find place for new RAMDISK of size %lld\n",
          +                              ramdisk_size);
          +         
          +             /* Note: this includes all the lowmem currently occupied by
          +                the initrd, we rely on that fact to keep the data intact. */
          +             reserve_early(ramdisk_here, ramdisk_here + ramdisk_size,
          +                              "NEW RAMDISK");
          +             initrd_start = ramdisk_here + PAGE_OFFSET;
          +             initrd_end   = initrd_start + ramdisk_size;
          +             printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
          +                              ramdisk_here, ramdisk_here + ramdisk_size);
          +         
          +             q = (char *)initrd_start;
          +         
          +             /* Copy any lowmem portion of the initrd */
          +             if (ramdisk_image < end_of_lowmem) {
          +                     clen = end_of_lowmem - ramdisk_image;
          +                     p = (char *)__va(ramdisk_image);
          +                     memcpy(q, p, clen);
          +                     q += clen;
          +                     ramdisk_image += clen;
          +                     ramdisk_size  -= clen;
          +             }
          +         
          +             /* Copy the highmem portion of the initrd */
          +             while (ramdisk_size) {
          +                     slop = ramdisk_image & ~PAGE_MASK;
          +                     clen = ramdisk_size;
          +                     if (clen > MAX_MAP_CHUNK-slop)
          +                             clen = MAX_MAP_CHUNK-slop;
          +                     mapaddr = ramdisk_image & PAGE_MASK;
          +                     p = early_ioremap(mapaddr, clen+slop);
          +                     memcpy(q, p+slop, clen);
          +                     early_iounmap(p, clen+slop);
          +                     q += clen;
          +                     ramdisk_image += clen;
          +                     ramdisk_size  -= clen;
          +             }
          +             /* high pages is not converted by early_res_to_bootmem */
          +             ramdisk_image = boot_params.hdr.ramdisk_image;
          +             ramdisk_size  = boot_params.hdr.ramdisk_size;
          +             printk(KERN_INFO "Move RAMDISK from %016llx - %016llx to"
          +                     " %08llx - %08llx\n",
          +                     ramdisk_image, ramdisk_image + ramdisk_size - 1,
          +                     ramdisk_here, ramdisk_here + ramdisk_size - 1);
          +         }
                    #endif
          +         
          +         static void __init reserve_initrd(void)
          +         {
          +             u64 ramdisk_image = boot_params.hdr.ramdisk_image;
          +             u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
          +             u64 ramdisk_end   = ramdisk_image + ramdisk_size;
          +             u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
          +         
          +             if (!boot_params.hdr.type_of_loader ||
          +                 !ramdisk_image || !ramdisk_size)
          +                     return;         /* No initrd provided by bootloader */
          +         
          +             initrd_start = 0;
          +         
          +             if (ramdisk_size >= (end_of_lowmem>>1)) {
          +                     free_early(ramdisk_image, ramdisk_end);
          +                     printk(KERN_ERR "initrd too large to handle, "
          +                            "disabling initrd\n");
          +                     return;
          +             }
          +         
          +             printk(KERN_INFO "RAMDISK: %08llx - %08llx\n", ramdisk_image,
          +                             ramdisk_end);
          +         
          +         
          +             if (ramdisk_end <= end_of_lowmem) {
          +                     /* All in lowmem, easy case */
          +                     /*
          +                      * don't need to reserve again, already reserved early
          +                      * in i386_start_kernel
          +                      */
          +                     initrd_start = ramdisk_image + PAGE_OFFSET;
          +                     initrd_end = initrd_start + ramdisk_size;
          +                     return;
                        }
                    
          -             /* indicate the early static arrays will soon be gone */
          -             x86_cpu_to_apicid_early_ptr = NULL;
          -             x86_bios_cpu_apicid_early_ptr = NULL;
          -         #ifdef CONFIG_NUMA
          -             x86_cpu_to_node_map_early_ptr = NULL;
          +         #ifdef CONFIG_X86_32
          +             relocate_initrd();
          +         #else
          +             printk(KERN_ERR "initrd extends beyond end of memory "
          +                    "(0x%08llx > 0x%08llx)\ndisabling initrd\n",
          +                    ramdisk_end, end_of_lowmem);
          +             initrd_start = 0;
                    #endif
          +             free_early(ramdisk_image, ramdisk_end);
                    }
          +         #else
          +         static void __init reserve_initrd(void)
          +         {
          +         }
          +         #endif /* CONFIG_BLK_DEV_INITRD */
          +         
          +         static void __init parse_setup_data(void)
          +         {
          +             struct setup_data *data;
          +             u64 pa_data;
          +         
          +             if (boot_params.hdr.version < 0x0209)
          +                     return;
          +             pa_data = boot_params.hdr.setup_data;
          +             while (pa_data) {
          +                     data = early_ioremap(pa_data, PAGE_SIZE);
          +                     switch (data->type) {
          +                     case SETUP_E820_EXT:
          +                             parse_e820_ext(data, pa_data);
          +                             break;
          +                     default:
          +                             break;
          +                     }
          +                     pa_data = data->next;
          +                     early_iounmap(data, PAGE_SIZE);
          +             }
          +         }
          +         
          +         static void __init e820_reserve_setup_data(void)
          +         {
          +             struct setup_data *data;
          +             u64 pa_data;
          +             int found = 0;
          +         
          +             if (boot_params.hdr.version < 0x0209)
          +                     return;
          +             pa_data = boot_params.hdr.setup_data;
          +             while (pa_data) {
          +                     data = early_ioremap(pa_data, sizeof(*data));
          +                     e820_update_range(pa_data, sizeof(*data)+data->len,
          +                              E820_RAM, E820_RESERVED_KERN);
          +                     found = 1;
          +                     pa_data = data->next;
          +                     early_iounmap(data, sizeof(*data));
          +             }
          +             if (!found)
          +                     return;
                    
          -         #ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
          -         cpumask_t *cpumask_of_cpu_map __read_mostly;
          -         EXPORT_SYMBOL(cpumask_of_cpu_map);
          +             sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
          +             memcpy(&e820_saved, &e820, sizeof(struct e820map));
          +             printk(KERN_INFO "extended physical RAM map:\n");
          +             e820_print_map("reserve setup_data");
          +         }
                    
          -         /* requires nr_cpu_ids to be initialized */
          -         static void __init setup_cpumask_of_cpu(void)
          +         static void __init reserve_early_setup_data(void)
                    {
          -             int i;
          +             struct setup_data *data;
          +             u64 pa_data;
          +             char buf[32];
          +         
          +             if (boot_params.hdr.version < 0x0209)
          +                     return;
          +             pa_data = boot_params.hdr.setup_data;
          +             while (pa_data) {
          +                     data = early_ioremap(pa_data, sizeof(*data));
          +                     sprintf(buf, "setup data %x", data->type);
          +                     reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
          +                     pa_data = data->next;
          +                     early_iounmap(data, sizeof(*data));
          +             }
          +         }
          +         
          +         /*
          +          * --------- Crashkernel reservation ------------------------------
          +          */
          +         
          +         #ifdef CONFIG_KEXEC
          +         
          +         /**
          +          * Reserve @size bytes of crashkernel memory at any suitable offset.
          +          *
          +          * @size: Size of the crashkernel memory to reserve.
          +          * Returns the base address on success, and -1ULL on failure.
          +          */
          +         unsigned long long find_and_reserve_crashkernel(unsigned long long size)
          +         {
          +             const unsigned long long alignment = 16<<20;    /* 16M */
          +             unsigned long long start = 0LL;
          +         
          +             while (1) {
          +                     int ret;
          +         
          +                     start = find_e820_area(start, ULONG_MAX, size, alignment);
          +                     if (start == -1ULL)
          +                             return start;
          +         
          +                     /* try to reserve it */
          +                     ret = reserve_bootmem_generic(start, size, BOOTMEM_EXCLUSIVE);
          +                     if (ret >= 0)
          +                             return start;
                    
          -             /* alloc_bootmem zeroes memory */
          -             cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids);
          -             for (i = 0; i < nr_cpu_ids; i++)
          -                     cpu_set(i, cpumask_of_cpu_map[i]);
          +                     start += alignment;
          +             }
          +         }
          +         
          +         static inline unsigned long long get_total_mem(void)
          +         {
          +             unsigned long long total;
          +         
          +             total = max_low_pfn - min_low_pfn;
          +         #ifdef CONFIG_HIGHMEM
          +             total += highend_pfn - highstart_pfn;
          +         #endif
          +         
          +             return total << PAGE_SHIFT;
          +         }
          +         
          +         static void __init reserve_crashkernel(void)
          +         {
          +             unsigned long long total_mem;
          +             unsigned long long crash_size, crash_base;
          +             int ret;
          +         
          +             total_mem = get_total_mem();
          +         
          +             ret = parse_crashkernel(boot_command_line, total_mem,
          +                             &crash_size, &crash_base);
          +             if (ret != 0 || crash_size <= 0)
          +                     return;
          +         
          +             /* 0 means: find the address automatically */
          +             if (crash_base <= 0) {
          +                     crash_base = find_and_reserve_crashkernel(crash_size);
          +                     if (crash_base == -1ULL) {
          +                             pr_info("crashkernel reservation failed. "
          +                                     "No suitable area found.\n");
          +                             return;
          +                     }
          +             } else {
          +                     ret = reserve_bootmem_generic(crash_base, crash_size,
          +                                             BOOTMEM_EXCLUSIVE);
          +                     if (ret < 0) {
          +                             pr_info("crashkernel reservation failed - "
          +                                     "memory is in use\n");
          +                             return;
          +                     }
          +             }
          +         
          +             printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
          +                             "for crashkernel (System RAM: %ldMB)\n",
          +                             (unsigned long)(crash_size >> 20),
          +                             (unsigned long)(crash_base >> 20),
          +                             (unsigned long)(total_mem >> 20));
          +         
          +             crashk_res.start = crash_base;
          +             crashk_res.end   = crash_base + crash_size - 1;
          +             insert_resource(&iomem_resource, &crashk_res);
                    }
                    #else
          -         static inline void setup_cpumask_of_cpu(void) { }
          +         static void __init reserve_crashkernel(void)
          +         {
          +         }
                    #endif
                    
          -         #ifdef CONFIG_X86_32
          -         /*
          -          * Great future not-so-futuristic plan: make i386 and x86_64 do it
          -          * the same way
          +         static struct resource standard_io_resources[] = {
          +             { .name = "dma1", .start = 0x00, .end = 0x1f,
          +                     .flags = IORESOURCE_BUSY | IORESOURCE_IO },
          +             { .name = "pic1", .start = 0x20, .end = 0x21,
          +                     .flags = IORESOURCE_BUSY | IORESOURCE_IO },
          +             { .name = "timer0", .start = 0x40, .end = 0x43,
          +                     .flags = IORESOURCE_BUSY | IORESOURCE_IO },
          +             { .name = "timer1", .start = 0x50, .end = 0x53,
          +                     .flags = IORESOURCE_BUSY | IORESOURCE_IO },
          +             { .name = "keyboard", .start = 0x60, .end = 0x60,
          +                     .flags = IORESOURCE_BUSY | IORESOURCE_IO },
          +             { .name = "keyboard", .start = 0x64, .end = 0x64,
          +                     .flags = IORESOURCE_BUSY | IORESOURCE_IO },
          +             { .name = "dma page reg", .start = 0x80, .end = 0x8f,
          +                     .flags = IORESOURCE_BUSY | IORESOURCE_IO },
          +             { .name = "pic2", .start = 0xa0, .end = 0xa1,
          +                     .flags = IORESOURCE_BUSY | IORESOURCE_IO },
          +             { .name = "dma2", .start = 0xc0, .end = 0xdf,
          +                     .flags = IORESOURCE_BUSY | IORESOURCE_IO },
          +             { .name = "fpu", .start = 0xf0, .end = 0xff,
          +                     .flags = IORESOURCE_BUSY | IORESOURCE_IO }
          +         };
          +         
          +         static void __init reserve_standard_io_resources(void)
          +         {
          +             int i;
          +         
          +             /* request I/O space for devices used on all i[345]86 PCs */
          +             for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
          +                     request_resource(&ioport_resource, &standard_io_resources[i]);
          +         
          +         }
          +         
          +         #ifdef CONFIG_PROC_VMCORE
          +         /* elfcorehdr= specifies the location of elf core header
          +          * stored by the crashed kernel. This option will be passed
          +          * by kexec loader to the capture kernel.
                     */
          -         unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
          -         EXPORT_SYMBOL(__per_cpu_offset);
          +         static int __init setup_elfcorehdr(char *arg)
          +         {
          +             char *end;
          +             if (!arg)
          +                     return -EINVAL;
          +             elfcorehdr_addr = memparse(arg, &end);
          +             return end > arg ? 0 : -EINVAL;
          +         }
          +         early_param("elfcorehdr", setup_elfcorehdr);
                    #endif
                    
+++ ++++++++++++++++static struct x86_quirks default_x86_quirks __initdata;
+++ ++++++++++++++++
+++ ++++++++++++++++struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
+++ ++++++++++++++++
          +         /*
          +          * Determine if we were loaded by an EFI loader.  If so, then we have also been
          +          * passed the efi memmap, systab, etc., so we should use these data structures
          +          * for initialization.  Note, the efi init code path is determined by the
          +          * global efi_enabled. This allows the same kernel image to be used on existing
          +          * systems (with a traditional BIOS) as well as on EFI systems.
          +          */
                    /*
          -          * Great future plan:
          -          * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
          -          * Always point %gs to its beginning
          +          * setup_arch - architecture-specific boot-time initializations
          +          *
          +          * Note: On x86_64, fixmaps are ready for use even before this is called.
                     */
          -         void __init setup_per_cpu_areas(void)
          +         
          +         void __init setup_arch(char **cmdline_p)
                    {
          -             int i, highest_cpu = 0;
          -             unsigned long size;
          +         #ifdef CONFIG_X86_32
          +             memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
       +  +             visws_early_detect();
          +             pre_setup_arch_hook();
          +             early_cpu_init();
          +         #else
          +             printk(KERN_INFO "Command line: %s\n", boot_command_line);
          +         #endif
                    
          -         #ifdef CONFIG_HOTPLUG_CPU
          -             prefill_possible_map();
          +             early_ioremap_init();
          +         
          +             ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
          +             screen_info = boot_params.screen_info;
          +             edid_info = boot_params.edid_info;
          +         #ifdef CONFIG_X86_32
          +             apm_info.bios = boot_params.apm_bios_info;
          +             ist_info = boot_params.ist_info;
          +             if (boot_params.sys_desc_table.length != 0) {
          +                     set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2);
          +                     machine_id = boot_params.sys_desc_table.table[0];
          +                     machine_submodel_id = boot_params.sys_desc_table.table[1];
          +                     BIOS_revision = boot_params.sys_desc_table.table[2];
          +             }
          +         #endif
          +             saved_video_mode = boot_params.hdr.vid_mode;
          +             bootloader_type = boot_params.hdr.type_of_loader;
          +         
          +         #ifdef CONFIG_BLK_DEV_RAM
          +             rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
          +             rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
          +             rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
          +         #endif
          +         #ifdef CONFIG_EFI
          +             if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
          +         #ifdef CONFIG_X86_32
          +                          "EL32",
          +         #else
          +                          "EL64",
                    #endif
          +              4)) {
          +                     efi_enabled = 1;
          +                     efi_reserve_early();
          +             }
          +         #endif
          +         
          +             ARCH_SETUP
          +         
          +             setup_memory_map();
          +             parse_setup_data();
          +             /* update the e820_saved too */
          +             e820_reserve_setup_data();
                    
          -             /* Copy section for each CPU (we discard the original) */
          -             size = PERCPU_ENOUGH_ROOM;
          -             printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n",
          -                               size);
          +             copy_edd();
                    
          -             for_each_possible_cpu(i) {
          -                     char *ptr;
          -         #ifndef CONFIG_NEED_MULTIPLE_NODES
          -                     ptr = alloc_bootmem_pages(size);
          +             if (!boot_params.hdr.root_flags)
          +                     root_mountflags &= ~MS_RDONLY;
          +             init_mm.start_code = (unsigned long) _text;
          +             init_mm.end_code = (unsigned long) _etext;
          +             init_mm.end_data = (unsigned long) _edata;
          +         #ifdef CONFIG_X86_32
          +             init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
                    #else
          -                     int node = early_cpu_to_node(i);
          -                     if (!node_online(node) || !NODE_DATA(node)) {
          -                             ptr = alloc_bootmem_pages(size);
          -                             printk(KERN_INFO
          -                                    "cpu %d has no node or node-local memory\n", i);
          -                     }
          -                     else
          -                             ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
          +             init_mm.brk = (unsigned long) &_end;
                    #endif
          -                     if (!ptr)
          -                             panic("Cannot allocate cpu data for CPU %d\n", i);
          +         
          +             code_resource.start = virt_to_phys(_text);
          +             code_resource.end = virt_to_phys(_etext)-1;
          +             data_resource.start = virt_to_phys(_etext);
          +             data_resource.end = virt_to_phys(_edata)-1;
          +             bss_resource.start = virt_to_phys(&__bss_start);
          +             bss_resource.end = virt_to_phys(&__bss_stop)-1;
          +         
                    #ifdef CONFIG_X86_64
          -                     cpu_pda(i)->data_offset = ptr - __per_cpu_start;
          +             early_cpu_init();
          +         #endif
          +             strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
          +             *cmdline_p = command_line;
          +         
          +             parse_early_param();
          +         
          +             /* after early param, so could get panic from serial */
          +             reserve_early_setup_data();
          +         
          +             if (acpi_mps_check()) {
          +         #ifdef CONFIG_X86_LOCAL_APIC
          +                     disable_apic = 1;
          +         #endif
          +                     clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
          +             }
          +         
      +++ + ++  +   #ifdef CONFIG_PCI
      +++ + ++  +       if (pci_early_dump_regs)
      +++ + ++  +               early_dump_pci_devices();
      +++ + ++  +   #endif
      +++ + ++  +   
          +             finish_e820_parsing();
          +         
          +         #ifdef CONFIG_X86_32
          +             probe_roms();
          +         #endif
          +         
          +             /* after parse_early_param, so could debug it */
          +             insert_resource(&iomem_resource, &code_resource);
          +             insert_resource(&iomem_resource, &data_resource);
          +             insert_resource(&iomem_resource, &bss_resource);
          +         
          +             if (efi_enabled)
          +                     efi_init();
          +         
          +         #ifdef CONFIG_X86_32
          +             if (ppro_with_ram_bug()) {
          +                     e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM,
          +                                       E820_RESERVED);
          +                     sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
          +                     printk(KERN_INFO "fixed physical RAM map:\n");
          +                     e820_print_map("bad_ppro");
          +             }
          +         #else
          +             early_gart_iommu_check();
          +         #endif
          +         
          +             /*
          +              * partially used pages are not usable - thus
          +              * we are rounding upwards:
          +              */
          +             max_pfn = e820_end_of_ram_pfn();
          +         
          +             /* preallocate 4k for mptable mpc */
          +             early_reserve_e820_mpc_new();
          +             /* update e820 for memory not covered by WB MTRRs */
          +             mtrr_bp_init();
          +             if (mtrr_trim_uncached_memory(max_pfn))
          +                     max_pfn = e820_end_of_ram_pfn();
          +         
          +         #ifdef CONFIG_X86_32
          +             /* max_low_pfn get updated here */
          +             find_low_pfn_range();
                    #else
          -                     __per_cpu_offset[i] = ptr - __per_cpu_start;
          +             num_physpages = max_pfn;
          +         
          +             check_efer();
          +         
          +             /* How many end-of-memory variables you have, grandma! */
          +             /* need this before calling reserve_initrd */
          +             if (max_pfn > (1UL<<(32 - PAGE_SHIFT)))
          +                     max_low_pfn = e820_end_of_low_ram_pfn();
          +             else
          +                     max_low_pfn = max_pfn;
          +         
          +             high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
                    #endif
          -                     memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
                    
          -                     highest_cpu = i;
          +             /* max_pfn_mapped is updated here */
          +             max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
          +             max_pfn_mapped = max_low_pfn_mapped;
          +         
          +         #ifdef CONFIG_X86_64
          +             if (max_pfn > max_low_pfn) {
          +                     max_pfn_mapped = init_memory_mapping(1UL<<32,
          +                                                          max_pfn<<PAGE_SHIFT);
          +                     /* can we preseve max_low_pfn ?*/
          +                     max_low_pfn = max_pfn;
                        }
          +         #endif
                    
          -             nr_cpu_ids = highest_cpu + 1;
          -             printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d\n", NR_CPUS, nr_cpu_ids);
          +             /*
          +              * NOTE: On x86-32, only from this point on, fixmaps are ready for use.
          +              */
                    
          -             /* Setup percpu data maps */
          -             setup_per_cpu_maps();
          +         #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
          +             if (init_ohci1394_dma_early)
          +                     init_ohci1394_dma_on_all_controllers();
          +         #endif
                    
          -             /* Setup cpumask_of_cpu map */
          -             setup_cpumask_of_cpu();
          -         }
          +             reserve_initrd();
          +         
          +         #ifdef CONFIG_X86_64
          +             vsmp_init();
          +         #endif
          +         
          +             dmi_scan_machine();
          +         
          +             io_delay_init();
          +         
          +             /*
          +              * Parse the ACPI tables for possible boot-time SMP configuration.
          +              */
          +             acpi_boot_table_init();
          +         
          +         #ifdef CONFIG_ACPI_NUMA
          +             /*
          +              * Parse SRAT to discover nodes.
          +              */
          +             acpi_numa_init();
          +         #endif
          +         
          +             initmem_init(0, max_pfn);
          +         
          +         #ifdef CONFIG_X86_64
          +             dma32_reserve_bootmem();
          +         #endif
                    
          +         #ifdef CONFIG_ACPI_SLEEP
          +             /*
          +              * Reserve low memory region for sleep support.
          +              */
          +             acpi_reserve_bootmem();
                    #endif
--- --- -- ----- ---#ifdef CONFIG_X86_NUMAQ
--- --- -- ----- ---    /*
--- --- -- ----- ---     * need to check online nodes num, call it
--- --- -- ----- ---     * here before time_init/tsc_init
--- --- -- ----- ---     */
--- --- -- ----- ---    numaq_tsc_disable();
--- --- -- ----- ---#endif
--- --- -- ----- ---
          +         #ifdef CONFIG_X86_FIND_SMP_CONFIG
          +             /*
          +              * Find and reserve possible boot-time SMP configuration:
          +              */
          +             find_smp_config();
          +         #endif
          +             reserve_crashkernel();
          +         
          +             reserve_ibft_region();
          +         
          +         #ifdef CONFIG_KVM_CLOCK
          +             kvmclock_init();
          +         #endif
          +         
          +         #if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
          +             /*
          +              * Must be after max_low_pfn is determined, and before kernel
          +              * pagetables are setup.
          +              */
          +             vmi_init();
          +         #endif
          +         
+++++++++++++++++++     paravirt_pagetable_setup_start(swapper_pg_dir);
          +             paging_init();
+++++++++++++++++++     paravirt_pagetable_setup_done(swapper_pg_dir);
+++++++++++++++++++     paravirt_post_allocator_init();
          +         
          +         #ifdef CONFIG_X86_64
          +             map_vsyscall();
          +         #endif
          +         
          +         #ifdef CONFIG_X86_GENERICARCH
          +             generic_apic_probe();
          +         #endif
          +         
          +             early_quirks();
          +         
          +             /*
          +              * Read APIC and some other early information from ACPI tables.
          +              */
          +             acpi_boot_init();
          +         
          +         #if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS)
          +             /*
          +              * get boot-time SMP configuration:
          +              */
          +             if (smp_found_config)
          +                     get_smp_config();
          +         #endif
          +         
          +             prefill_possible_map();
          +         #ifdef CONFIG_X86_64
          +             init_cpu_to_node();
          +         #endif
          +         
          +             init_apic_mappings();
          +             ioapic_init_mappings();
          +         
          +         #if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) && defined(CONFIG_X86_32)
          +             if (def_to_bigsmp)
          +                     printk(KERN_WARNING "More than 8 CPUs detected and "
          +                             "CONFIG_X86_PC cannot handle it.\nUse "
          +                             "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
          +         #endif
          +             kvm_guest_init();
          +         
          +             e820_reserve_resources();
          +             e820_mark_nosave_regions(max_low_pfn);
          +         
          +         #ifdef CONFIG_X86_32
          +             request_resource(&iomem_resource, &video_ram_resource);
          +         #endif
          +             reserve_standard_io_resources();
          +         
          +             e820_setup_gap();
          +         
          +         #ifdef CONFIG_VT
          +         #if defined(CONFIG_VGA_CONSOLE)
          +             if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
          +                     conswitchp = &vga_con;
          +         #elif defined(CONFIG_DUMMY_CONSOLE)
          +             conswitchp = &dummy_con;
          +         #endif
          +         #endif
          +         }
Simple merge
@@@@@@@@@@@@@@@@@@@@@ -327,12 -327,12 -327,12 -327,12 -327,12 -327,12 -327,12 -327,12 -327,12 -327,12 -345,19 -327,12 -327,12 -327,12 -327,12 -327,12 -327,12 -327,12 -327,12 -327,12 +327,12 @@@@@@@@@@@@@@@@@@@@@ static void __cpuinit start_secondary(v
                         * lock helps us to not include this cpu in a currently in progress
                         * smp_call_function().
                         */
      --  - -   -       lock_ipi_call_lock();
          -         #ifdef CONFIG_X86_64
          -             spin_lock(&vector_lock);
          -         
          -             /* Setup the per cpu irq handling data structures */
          -             __setup_vector_irq(smp_processor_id());
          -             /*
          -              * Allow the master to continue.
          -              */
          -             spin_unlock(&vector_lock);
      ++  + +   +       ipi_call_lock_irq();
          +         #ifdef CONFIG_X86_IO_APIC
          +             setup_vector_irq(smp_processor_id());
                    #endif
                        cpu_set(smp_processor_id(), cpu_online_map);
      --  - -   -       unlock_ipi_call_lock();
      ++  + +   +       ipi_call_unlock_irq();
                        per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
                    
                        setup_secondary_clock();
@@@@@@@@@@@@@@@@@@@@@ -762,45 -762,45 -751,45 -762,45 -762,45 -762,45 -762,45 -762,45 -762,45 -762,45 -832,6 -762,45 -762,45 -762,45 -762,45 -762,45 -762,45 -762,45 -762,45 -762,45 +751,45 @@@@@@@@@@@@@@@@@@@@@ static void __cpuinit do_fork_idle(stru
                        complete(&c_idle->done);
                    }
                    
---------- -------- static int __cpuinit get_local_pda(int cpu)
          +         #ifdef CONFIG_X86_64
          +         /*
          +          * Allocate node local memory for the AP pda.
          +          *
          +          * Must be called after the _cpu_pda pointer table is initialized.
          +          */
+++++++++++++++++++ int __cpuinit get_local_pda(int cpu)
          +         {
          +             struct x8664_pda *oldpda, *newpda;
          +             unsigned long size = sizeof(struct x8664_pda);
          +             int node = cpu_to_node(cpu);
          +         
          +             if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem)
          +                     return 0;
          +         
          +             oldpda = cpu_pda(cpu);
          +             newpda = kmalloc_node(size, GFP_ATOMIC, node);
          +             if (!newpda) {
          +                     printk(KERN_ERR "Could not allocate node local PDA "
          +                             "for CPU %d on node %d\n", cpu, node);
          +         
          +                     if (oldpda)
          +                             return 0;       /* have a usable pda */
          +                     else
          +                             return -1;
          +             }
          +         
          +             if (oldpda) {
          +                     memcpy(newpda, oldpda, size);
          +                     if (!after_bootmem)
          +                             free_bootmem((unsigned long)oldpda, size);
          +             }
          +         
          +             newpda->in_bootmem = 0;
          +             cpu_pda(cpu) = newpda;
          +             return 0;
          +         }
          +         #endif /* CONFIG_X86_64 */
          +         
                    static int __cpuinit do_boot_cpu(int apicid, int cpu)
                    /*
                     * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
@@@@@@@@@@@@@@@@@@@@@ -1311,8 -1311,8 -1300,8 -1311,8 -1311,8 -1311,8 -1311,8 -1311,8 -1311,8 -1311,8 -1372,10 -1311,8 -1311,8 -1311,8 -1311,8 -1311,8 -1311,8 -1311,8 -1311,8 -1311,8 +1300,8 @@@@@@@@@@@@@@@@@@@@@ static void __ref remove_cpu_from_maps(
                        cpu_clear(cpu, cpu_callout_map);
                        cpu_clear(cpu, cpu_callin_map);
                        /* was set by cpu_init() */
 -------------------    clear_bit(cpu, (unsigned long *)&cpu_initialized);
          -             clear_node_cpumask(cpu);
          -         #endif
 +++++++++++++++++++    cpu_clear(cpu, cpu_initialized);
          +             numa_remove_cpu(cpu);
                    }
                    
                    int __cpu_disable(void)
@@@@@@@@@@@@@@@@@@@@@ -840,36 -840,36 -840,36 -840,36 -840,36 -840,36 -840,36 -840,36 -840,36 -840,39 -529,7 -840,36 -840,36 -840,36 -840,36 -840,36 -840,36 -840,36 -840,36 -840,34 +840,37 @@@@@@@@@@@@@@@@@@@@@ unsigned long __init_refok init_memory_
                    
                        __flush_tlb_all();
                    
          +             if (!after_init_bootmem)
          +                     reserve_early(table_start << PAGE_SHIFT,
          +                                      table_end << PAGE_SHIFT, "PGTABLE");
          +         
+++++++++ ++++++++++    if (!after_init_bootmem)
+++++++++ ++++++++++            early_memtest(start, end);
+++++++++ ++++++++++
          +             return end >> PAGE_SHIFT;
          +         }
          +         
          +         
          +         /*
          +          * paging_init() sets up the page tables - note that the first 8MB are
          +          * already mapped by head.S.
          +          *
          +          * This routines also unmaps the page at virtual kernel address 0, so
          +          * that we can trap those pesky NULL-reference errors in the kernel.
          +          */
          +         void __init paging_init(void)
          +         {
          +             pagetable_init();
          +         
          +             __flush_tlb_all();
          +         
                        kmap_init();
---------- -------- 
---------- --------     paravirt_post_allocator_init();
          +         
          +             /*
          +              * NOTE: at this point the bootmem allocator is fully available.
          +              */
          +             sparse_init();
          +             zone_sizes_init();
                    }
                    
                    /*
@@@@@@@@@@@@@@@@@@@@@ -449,9 -449,9 -449,9 -449,9 -449,9 -449,9 -449,9 -449,9 -449,9 -449,9 -547,8 -449,9 -451,9 -449,9 -449,9 -449,9 -449,9 -449,9 -449,9 -449,9 +451,9 @@@@@@@@@@@@@@@@@@@@@ int phys_mem_access_prot_allowed(struc
                        if (retval < 0)
                                return 0;
                    
       -        -       if (((pfn <= max_low_pfn_mapped) ||
       -        -            (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn <= max_pfn_mapped)) &&
          -             if (pfn <= max_pfn_mapped &&
          -                     ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
       +  +     +       if (((pfn < max_low_pfn_mapped) ||
       +  +     +            (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn < max_pfn_mapped)) &&
          +                 ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
                                free_memtype(offset, offset + size);
                                printk(KERN_INFO
                                "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n",
@@@@@@@@@@@@@@@@@@@@@ -489,3 -489,3 -489,3 -489,3 -489,3 -489,3 -489,3 -489,3 -489,3 -489,3 -586,4 -489,3 -491,89 -489,3 -489,3 -489,3 -489,3 -489,3 -489,3 -489,3 +491,89 @@@@@@@@@@@@@@@@@@@@@ void unmap_devmem(unsigned long pfn, un
                    
                        free_memtype(addr, addr + size);
                    }
++++++++++ + +++++++
++++++++++++ +++++++#if defined(CONFIG_DEBUG_FS)
++++++++++++ +++++++
++++++++++++ +++++++/* get Nth element of the linked list */
++++++++++++ +++++++static struct memtype *memtype_get_idx(loff_t pos)
++++++++++++ +++++++{
++++++++++++ +++++++    struct memtype *list_node, *print_entry;
++++++++++++ +++++++    int i = 1;
++++++++++++ +++++++
++++++++++++ +++++++    print_entry  = kmalloc(sizeof(struct memtype), GFP_KERNEL);
++++++++++++ +++++++    if (!print_entry)
++++++++++++ +++++++            return NULL;
++++++++++++ +++++++
++++++++++++ +++++++    spin_lock(&memtype_lock);
++++++++++++ +++++++    list_for_each_entry(list_node, &memtype_list, nd) {
++++++++++++ +++++++            if (pos == i) {
++++++++++++ +++++++                    *print_entry = *list_node;
++++++++++++ +++++++                    spin_unlock(&memtype_lock);
++++++++++++ +++++++                    return print_entry;
++++++++++++ +++++++            }
++++++++++++ +++++++            ++i;
++++++++++++ +++++++    }
++++++++++++ +++++++    spin_unlock(&memtype_lock);
++++++++++++ +++++++    kfree(print_entry);
++++++++++++ +++++++    return NULL;
++++++++++++ +++++++}
++++++++++++ +++++++
++++++++++++ +++++++static void *memtype_seq_start(struct seq_file *seq, loff_t *pos)
++++++++++++ +++++++{
++++++++++++ +++++++    if (*pos == 0) {
++++++++++++ +++++++            ++*pos;
++++++++++++ +++++++            seq_printf(seq, "PAT memtype list:\n");
++++++++++++ +++++++    }
++++++++++++ +++++++
++++++++++++ +++++++    return memtype_get_idx(*pos);
++++++++++++ +++++++}
++++++++++++ +++++++
++++++++++++ +++++++static void *memtype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
++++++++++++ +++++++{
++++++++++++ +++++++    ++*pos;
++++++++++++ +++++++    return memtype_get_idx(*pos);
++++++++++++ +++++++}
++++++++++++ +++++++
++++++++++++ +++++++static void memtype_seq_stop(struct seq_file *seq, void *v)
++++++++++++ +++++++{
++++++++++++ +++++++}
++++++++++++ +++++++
++++++++++++ +++++++static int memtype_seq_show(struct seq_file *seq, void *v)
++++++++++++ +++++++{
++++++++++++ +++++++    struct memtype *print_entry = (struct memtype *)v;
++++++++++++ +++++++
++++++++++++ +++++++    seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type),
++++++++++++ +++++++                    print_entry->start, print_entry->end);
++++++++++++ +++++++    kfree(print_entry);
++++++++++++ +++++++    return 0;
++++++++++++ +++++++}
++++++++++++ +++++++
++++++++++++ +++++++static struct seq_operations memtype_seq_ops = {
++++++++++++ +++++++    .start = memtype_seq_start,
++++++++++++ +++++++    .next  = memtype_seq_next,
++++++++++++ +++++++    .stop  = memtype_seq_stop,
++++++++++++ +++++++    .show  = memtype_seq_show,
++++++++++++ +++++++};
++++++++++++ +++++++
++++++++++++ +++++++static int memtype_seq_open(struct inode *inode, struct file *file)
++++++++++++ +++++++{
++++++++++++ +++++++    return seq_open(file, &memtype_seq_ops);
++++++++++++ +++++++}
++++++++++++ +++++++
++++++++++++ +++++++static const struct file_operations memtype_fops = {
++++++++++++ +++++++    .open    = memtype_seq_open,
++++++++++++ +++++++    .read    = seq_read,
++++++++++++ +++++++    .llseek  = seq_lseek,
++++++++++++ +++++++    .release = seq_release,
++++++++++++ +++++++};
++++++++++++ +++++++
++++++++++++ +++++++static int __init pat_memtype_list_init(void)
++++++++++++ +++++++{
++++++++++++ +++++++    debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir,
++++++++++++ +++++++                            NULL, &memtype_fops);
++++++++++++ +++++++    return 0;
++++++++++++ +++++++}
++++++++++++ +++++++
++++++++++++ +++++++late_initcall(pat_memtype_list_init);
++++++++++++ +++++++
++++++++++++ +++++++#endif /* CONFIG_DEBUG_FS */
                    #define PCI_CAN_SKIP_ISA_ALIGN      0x8000
                    #define PCI_USE__CRS                0x10000
                    #define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000
          +         #define PCI_HAS_IO_ECS              0x40000
      +++ + ++  +   #define PCI_NOASSIGN_ROMS   0x80000
                    
                    extern unsigned int pci_probe;
                    extern unsigned long pirq_table_addr;
@@@@@@@@@@@@@@@@@@@@@ -102,14 -102,14 -102,14 -102,14 -102,14 -102,14 -101,14 -101,13 -101,14 -102,14 -102,6 -102,14 -101,14 -101,14 -102,14 -102,14 -101,15 -102,14 -102,14 -102,14 +102,15 @@@@@@@@@@@@@@@@@@@@@ extern int pci_direct_probe(void)
                    extern void pci_direct_init(int type);
                    extern void pci_pcbios_init(void);
                    extern int pci_olpc_init(void);
------- -- ----- ---extern int __init pci_numa_init(void);
          +         extern void __init dmi_check_pciprobe(void);
          +         extern void __init dmi_check_skip_isa_align(void);
          +         
          +         /* some common used subsys_initcalls */
          +         extern int __init pci_acpi_init(void);
          +         extern int __init pcibios_irq_init(void);
++++++++++++++++ +++extern int __init pci_visws_init(void);
++++++++++++++++ +++extern int __init pci_numaq_init(void);
          +         extern int __init pcibios_init(void);
                    
                    /* pci-mmconfig.c */
                    
@@@@@@@@@@@@@@@@@@@@@ -137,41 -137,41 -137,41 -137,41 -137,41 -137,41 -137,41 -137,41 -137,41 -137,41 -136,11 -137,41 -137,41 -137,41 -137,41 -137,41 -137,41 -137,41 -137,45 -150,41 +150,45 @@@@@@@@@@@@@@@@@@@@@ static void xen_vcpu_setup(int cpu
                        }
                    }
                    
          +         /*
          +          * On restore, set the vcpu placement up again.
          +          * If it fails, then we're in a bad state, since
          +          * we can't back out from using it...
          +          */
          +         void xen_vcpu_restore(void)
          +         {
          +             if (have_vcpu_info_placement) {
          +                     int cpu;
          +         
          +                     for_each_online_cpu(cpu) {
          +                             bool other_cpu = (cpu != smp_processor_id());
          +         
          +                             if (other_cpu &&
          +                                 HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
          +                                     BUG();
          +         
          +                             xen_vcpu_setup(cpu);
          +         
          +                             if (other_cpu &&
          +                                 HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
          +                                     BUG();
          +                     }
          +         
          +                     BUG_ON(!have_vcpu_info_placement);
          +             }
          +         }
          +         
                    static void __init xen_banner(void)
                    {
++++++++++++++++++ +    unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL);
++++++++++++++++++ +    struct xen_extraversion extra;
++++++++++++++++++ +    HYPERVISOR_xen_version(XENVER_extraversion, &extra);
++++++++++++++++++ +
                        printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
                               pv_info.name);
---------- ------- -    printk(KERN_INFO "Hypervisor signature: %s%s\n",
---------- ------- -           xen_start_info->magic,
          -             printk(KERN_INFO "Hypervisor signature: %s\n", xen_start_info->magic);
++++++++++++++++++ +    printk(KERN_INFO "Xen version: %d.%d%s%s\n",
++++++++++++++++++ +           version >> 16, version & 0xffff, extra.extraversion,
          +                    xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
                    }
                    
                    static void xen_cpuid(unsigned int *ax, unsigned int *bx,
@@@@@@@@@@@@@@@@@@@@@ -841,68 -841,68 -841,68 -841,68 -841,68 -841,68 -841,68 -841,68 -841,68 -841,68 -784,68 -841,68 -841,68 -841,68 -841,68 -841,68 -841,68 -841,68 -845,68 -979,16 +983,16 @@@@@@@@@@@@@@@@@@@@@ static __init void xen_set_pte_init(pte
                    
                    static __init void xen_pagetable_setup_start(pgd_t *base)
                    {
-------------------     pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
-------------------     int i;
------------------- 
-------------------     /* special set_pte for pagetable initialization */
-------------------     pv_mmu_ops.set_pte = xen_set_pte_init;
------------------- 
-------------------     init_mm.pgd = base;
-------------------     /*
-------------------      * copy top-level of Xen-supplied pagetable into place.  This
-------------------      * is a stand-in while we copy the pmd pages.
-------------------      */
-------------------     memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t));
------------------- 
-------------------     /*
-------------------      * For PAE, need to allocate new pmds, rather than
-------------------      * share Xen's, since Xen doesn't like pmd's being
-------------------      * shared between address spaces.
-------------------      */
-------------------     for (i = 0; i < PTRS_PER_PGD; i++) {
-------------------             if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) {
-------------------                     pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
------------------- 
-------------------                     memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]),
-------------------                            PAGE_SIZE);
------------------- 
-------------------                     make_lowmem_page_readonly(pmd);
------------------- 
-------------------                     set_pgd(&base[i], __pgd(1 + __pa(pmd)));
-------------------             } else
-------------------                     pgd_clear(&base[i]);
-------------------     }
------------------- 
-------------------     /* make sure zero_page is mapped RO so we can use it in pagetables */
-------------------     make_lowmem_page_readonly(empty_zero_page);
-------------------     make_lowmem_page_readonly(base);
-------------------     /*
-------------------      * Switch to new pagetable.  This is done before
-------------------      * pagetable_init has done anything so that the new pages
-------------------      * added to the table can be prepared properly for Xen.
-------------------      */
-------------------     xen_write_cr3(__pa(base));
------------------- 
-------------------     /* Unpin initial Xen pagetable */
-------------------     pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
-------------------                       PFN_DOWN(__pa(xen_start_info->pt_base)));
                    }
                    
          -         static __init void setup_shared_info(void)
          +         void xen_setup_shared_info(void)
                    {
                        if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-------------------             unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP);
------------------- 
-------------------             /*
-------------------              * Create a mapping for the shared info page.
-------------------              * Should be set_fixmap(), but shared_info is a machine
-------------------              * address with no corresponding pseudo-phys address.
-------------------              */
-------------------             set_pte_mfn(addr,
-------------------                         PFN_DOWN(xen_start_info->shared_info),
-------------------                         PAGE_KERNEL);
------------------- 
-------------------             HYPERVISOR_shared_info = (struct shared_info *)addr;
+++++++++++++++++++             set_fixmap(FIX_PARAVIRT_BOOTMAP,
+++++++++++++++++++                        xen_start_info->shared_info);
+++++++++++++++++++ 
+++++++++++++++++++             HYPERVISOR_shared_info =
+++++++++++++++++++                     (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
                        } else
                                HYPERVISOR_shared_info =
                                        (struct shared_info *)__va(xen_start_info->shared_info);
                    
                    static __init void xen_pagetable_setup_done(pgd_t *base)
                    {
---------- --------     /* This will work as long as patching hasn't happened yet
---------- --------        (which it hasn't) */
---------- --------     pv_mmu_ops.alloc_pte = xen_alloc_pte;
---------- --------     pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
---------- --------     pv_mmu_ops.release_pte = xen_release_pte;
---------- --------     pv_mmu_ops.release_pmd = xen_release_pmd;
---------- --------     pv_mmu_ops.set_pte = xen_set_pte;
---------- -------- 
          +             xen_setup_shared_info();
---------- -------- 
---------- --------     /* Actually pin the pagetable down, but we can't set PG_pinned
---------- --------        yet because the page structures don't exist yet. */
---------- --------     pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
          +         }
          +         
          +         static __init void xen_post_allocator_init(void)
          +         {
+++++++++++++++++++     pv_mmu_ops.set_pte = xen_set_pte;
          +             pv_mmu_ops.set_pmd = xen_set_pmd;
          +             pv_mmu_ops.set_pud = xen_set_pud;
+++++++++++++++++++ #if PAGETABLE_LEVELS == 4
+++++++++++++++++++     pv_mmu_ops.set_pgd = xen_set_pgd;
+++++++++++++++++++ #endif
+++++++++++++++++++ 
++++++++++ ++++++++     /* This will work as long as patching hasn't happened yet
++++++++++ ++++++++        (which it hasn't) */
++++++++++ ++++++++     pv_mmu_ops.alloc_pte = xen_alloc_pte;
++++++++++ ++++++++     pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
++++++++++ ++++++++     pv_mmu_ops.release_pte = xen_release_pte;
++++++++++ ++++++++     pv_mmu_ops.release_pmd = xen_release_pmd;
          -             pv_mmu_ops.set_pte = xen_set_pte;
          -         
          -             setup_shared_info();
+++++++++++++++++++ #if PAGETABLE_LEVELS == 4
+++++++++++++++++++     pv_mmu_ops.alloc_pud = xen_alloc_pud;
+++++++++++++++++++     pv_mmu_ops.release_pud = xen_release_pud;
+++++++++++++++++++ #endif
                    
          -             /* Actually pin the pagetable down, but we can't set PG_pinned
          -                yet because the page structures don't exist yet. */
          -             pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++     SetPagePinned(virt_to_page(level3_user_vsyscall));
+++++++++++++++++++ #endif
          +             xen_mark_init_mm_pinned();
                    }
                    
                    /* This is called once we have the cpu_possible_map */
                        return ret;
                    }
                    
          +         static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
          +         {
          +             pte_t pte;
          +         
          +             phys >>= PAGE_SHIFT;
          +         
          +             switch (idx) {
          +             case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
          +         #ifdef CONFIG_X86_F00F_BUG
          +             case FIX_F00F_IDT:
          +         #endif
+++++++++++++++++++ #ifdef CONFIG_X86_32
          +             case FIX_WP_TEST:
          +             case FIX_VDSO:
+++++++++++++++++++ # ifdef CONFIG_HIGHMEM
+++++++++++++++++++     case FIX_KMAP_BEGIN ... FIX_KMAP_END:
+++++++++++++++++++ # endif
+++++++++++++++++++ #else
+++++++++++++++++++     case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
+++++++++++++++++++ #endif
          +         #ifdef CONFIG_X86_LOCAL_APIC
          +             case FIX_APIC_BASE:     /* maps dummy local APIC */
          +         #endif
          +                     pte = pfn_pte(phys, prot);
          +                     break;
          +         
          +             default:
          +                     pte = mfn_pte(phys, prot);
          +                     break;
          +             }
          +         
          +             __native_set_fixmap(idx, pte);
+++++++++++++++++++ 
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++     /* Replicate changes to map the vsyscall page into the user
+++++++++++++++++++        pagetable vsyscall mapping. */
+++++++++++++++++++     if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) {
+++++++++++++++++++             unsigned long vaddr = __fix_to_virt(idx);
+++++++++++++++++++             set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
+++++++++++++++++++     }
+++++++++++++++++++ #endif
          +         }
          +         
                    static const struct pv_info xen_info __initdata = {
                        .paravirt_enabled = 1,
                        .shared_kernel_pmd = 0,
@@@@@@@@@@@@@@@@@@@@@ -1089,7 -1089,7 -1089,7 -1089,7 -1089,7 -1089,7 -1089,7 -1089,7 -1089,7 -1089,7 -995,7 -1089,7 -1089,7 -1089,7 -1089,7 -1089,7 -1089,7 -1089,7 -1093,7 -1201,11 +1205,11 @@@@@@@@@@@@@@@@@@@@@ static const struct pv_cpu_ops xen_cpu_
                        .read_pmc = native_read_pmc,
                    
                        .iret = xen_iret,
          -             .irq_enable_syscall_ret = xen_sysexit,
          +             .irq_enable_sysexit = xen_sysexit,
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++     .usergs_sysret32 = xen_sysret32,
+++++++++++++++++++     .usergs_sysret64 = xen_sysret64,
+++++++++++++++++++ #endif
                    
                        .load_tr_desc = paravirt_nop,
                        .set_ldt = xen_set_ldt,
@@@@@@@@@@@@@@@@@@@@@ -1123,9 -1123,9 -1123,9 -1123,9 -1123,9 -1123,9 -1123,9 -1123,9 -1123,9 -1123,9 -1029,6 -1123,9 -1123,9 -1123,9 -1123,9 -1123,9 -1123,9 -1123,9 -1127,9 -1262,9 +1266,9 @@@@@@@@@@@@@@@@@@@@@ static const struct pv_irq_ops xen_irq_
                        .irq_enable = xen_irq_enable,
                        .safe_halt = xen_safe_halt,
                        .halt = xen_halt,
---------- --------     .adjust_exception_frame = paravirt_nop,
          +         #ifdef CONFIG_X86_64
+++++++++++++++++++     .adjust_exception_frame = xen_adjust_exception_frame,
          +         #endif
                    };
                    
                    static const struct pv_apic_ops xen_apic_ops __initdata = {
@@@@@@@@@@@@@@@@@@@@@ -1157,9 -1157,9 -1156,9 -1157,9 -1157,9 -1157,9 -1157,9 -1157,9 -1157,9 -1157,9 -1060,6 -1157,9 -1157,9 -1157,9 -1157,9 -1157,9 -1157,9 -1157,9 -1161,9 -1296,9 +1299,9 @@@@@@@@@@@@@@@@@@@@@ static const struct pv_mmu_ops xen_mmu_
                        .pte_update = paravirt_nop,
                        .pte_update_defer = paravirt_nop,
                    
---------- --------     .pgd_alloc = __paravirt_pgd_alloc,
---------- --------     .pgd_free = paravirt_nop,
+++++++++++++++++++     .pgd_alloc = xen_pgd_alloc,
+++++++++++++++++++     .pgd_free = xen_pgd_free,
          +         
                        .alloc_pte = xen_alloc_pte_init,
                        .release_pte = xen_release_pte_init,
                        .alloc_pmd = xen_alloc_pte_init,
                        .kmap_atomic_pte = xen_kmap_atomic_pte,
                    #endif
                    
-------------------     .set_pte = NULL,        /* see xen_pagetable_setup_* */
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++     .set_pte = xen_set_pte,
+++++++++++++++++++ #else
+++++++++++++++++++     .set_pte = xen_set_pte_init,
+++++++++++++++++++ #endif
                        .set_pte_at = xen_set_pte_at,
          -             .set_pmd = xen_set_pmd,
          +             .set_pmd = xen_set_pmd_hyper,
          +         
          +             .ptep_modify_prot_start = __ptep_modify_prot_start,
          +             .ptep_modify_prot_commit = __ptep_modify_prot_commit,
                    
                        .pte_val = xen_pte_val,
          +             .pte_flags = native_pte_val,
                        .pgd_val = xen_pgd_val,
                    
                        .make_pte = xen_make_pte,
                        .make_pgd = xen_make_pgd,
                    
+++++++++++++++++++ #ifdef CONFIG_X86_PAE
                        .set_pte_atomic = xen_set_pte_atomic,
                        .set_pte_present = xen_set_pte_at,
---------- --------     .set_pud = xen_set_pud_hyper,
          -             .set_pud = xen_set_pud,
                        .pte_clear = xen_pte_clear,
                        .pmd_clear = xen_pmd_clear,
+++++++++++++++++++ #endif      /* CONFIG_X86_PAE */
+++++++++++++++++++     .set_pud = xen_set_pud_hyper,
                    
                        .make_pmd = xen_make_pmd,
                        .pmd_val = xen_pmd_val,
                                .enter = paravirt_enter_lazy_mmu,
                                .leave = xen_leave_lazy,
                        },
          -         };
                    
          -         #ifdef CONFIG_SMP
          -         static const struct smp_ops xen_smp_ops __initdata = {
          -             .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
          -             .smp_prepare_cpus = xen_smp_prepare_cpus,
          -             .cpu_up = xen_cpu_up,
          -             .smp_cpus_done = xen_smp_cpus_done,
          -         
          -             .smp_send_stop = xen_smp_send_stop,
          -             .smp_send_reschedule = xen_smp_send_reschedule,
          -             .smp_call_function_mask = xen_smp_call_function_mask,
          +             .set_fixmap = xen_set_fixmap,
                    };
          -         #endif      /* CONFIG_SMP */
                    
---------- -------- #ifdef CONFIG_SMP
---------- -------- static const struct smp_ops xen_smp_ops __initdata = {
---------- --------     .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
---------- --------     .smp_prepare_cpus = xen_smp_prepare_cpus,
---------- --------     .cpu_up = xen_cpu_up,
---------- --------     .smp_cpus_done = xen_smp_cpus_done,
---------- -------- 
---------- --------     .smp_send_stop = xen_smp_send_stop,
---------- --------     .smp_send_reschedule = xen_smp_send_reschedule,
------  -- - --- -- 
------  -- - --- --     .send_call_func_ipi = xen_smp_send_call_function_ipi,
------  -- - --- --     .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
      --    -   -       .smp_call_function_mask = xen_smp_call_function_mask,
---------- -------- };
---------- -------- #endif      /* CONFIG_SMP */
---------- -------- 
                    static void xen_reboot(int reason)
                    {
          +             struct sched_shutdown r = { .reason = reason };
          +         
                    #ifdef CONFIG_SMP
                        smp_send_stop();
                    #endif
@@@@@@@@@@@@@@@@@@@@@ -1271,8 -1271,8 -1270,8 -1271,8 -1271,8 -1271,8 -1269,8 -1269,8 -1271,8 -1271,8 -1161,8 -1271,8 -1269,8 -1271,8 -1271,8 -1271,8 -1269,8 -1271,8 -1275,8 -1411,248 +1414,248 @@@@@@@@@@@@@@@@@@@@@ static void __init xen_reserve_top(void
                                top = pp.virt_start;
                    
                        reserve_top_address(-top + 2 * PAGE_SIZE);
+++++++++++++++++++ #endif      /* CONFIG_X86_32 */
+++++++++++++++++++ }
+++++++++++++++++++ 
+++++++++++++++++++ /*
+++++++++++++++++++  * Like __va(), but returns address in the kernel mapping (which is
+++++++++++++++++++  * all we have until the physical memory mapping has been set up.
+++++++++++++++++++  */
+++++++++++++++++++ static void *__ka(phys_addr_t paddr)
+++++++++++++++++++ {
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++     return (void *)(paddr + __START_KERNEL_map);
+++++++++++++++++++ #else
+++++++++++++++++++     return __va(paddr);
+++++++++++++++++++ #endif
      ++  + +   + + }
      ++  + +   + + 
+++++++++++++++++++ /* Convert a machine address to physical address */
+++++++++++++++++++ static unsigned long m2p(phys_addr_t maddr)
+++++++++++++++++++ {
+++++++++++++++++++     phys_addr_t paddr;
+++++++++++++++++++ 
+++++++++++++++++++     maddr &= PTE_MASK;
+++++++++++++++++++     paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
+++++++++++++++++++ 
+++++++++++++++++++     return paddr;
++++++++++ ++++++++ }
++++++++++ ++++++++ 
+++++++++++++++++++ /* Convert a machine address to kernel virtual */
+++++++++++++++++++ static void *m2v(phys_addr_t maddr)
+++++++++++++++++++ {
+++++++++++++++++++     return __ka(m2p(maddr));
+++++++++++++++++++ }
+++++++++++++++++++ 
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++ static void walk(pgd_t *pgd, unsigned long addr)
+++++++++++++++++++ {
+++++++++++++++++++     unsigned l4idx = pgd_index(addr);
+++++++++++++++++++     unsigned l3idx = pud_index(addr);
+++++++++++++++++++     unsigned l2idx = pmd_index(addr);
+++++++++++++++++++     unsigned l1idx = pte_index(addr);
+++++++++++++++++++     pgd_t l4;
+++++++++++++++++++     pud_t l3;
+++++++++++++++++++     pmd_t l2;
+++++++++++++++++++     pte_t l1;
+++++++++++++++++++ 
+++++++++++++++++++     xen_raw_printk("walk %p, %lx -> %d %d %d %d\n",
+++++++++++++++++++                    pgd, addr, l4idx, l3idx, l2idx, l1idx);
+++++++++++++++++++ 
+++++++++++++++++++     l4 = pgd[l4idx];
+++++++++++++++++++     xen_raw_printk("  l4: %016lx\n", l4.pgd);
+++++++++++++++++++     xen_raw_printk("      %016lx\n", pgd_val(l4));
+++++++++++++++++++ 
+++++++++++++++++++     l3 = ((pud_t *)(m2v(l4.pgd)))[l3idx];
+++++++++++++++++++     xen_raw_printk("  l3: %016lx\n", l3.pud);
+++++++++++++++++++     xen_raw_printk("      %016lx\n", pud_val(l3));
+++++++++++++++++++ 
+++++++++++++++++++     l2 = ((pmd_t *)(m2v(l3.pud)))[l2idx];
+++++++++++++++++++     xen_raw_printk("  l2: %016lx\n", l2.pmd);
+++++++++++++++++++     xen_raw_printk("      %016lx\n", pmd_val(l2));
+++++++++++++++++++ 
+++++++++++++++++++     l1 = ((pte_t *)(m2v(l2.pmd)))[l1idx];
+++++++++++++++++++     xen_raw_printk("  l1: %016lx\n", l1.pte);
+++++++++++++++++++     xen_raw_printk("      %016lx\n", pte_val(l1));
+++++++++++++++++++ }
+++++++++++++++++++ #endif
+++++++++++++++++++ 
+++++++++++++++++++ static void set_page_prot(void *addr, pgprot_t prot)
+++++++++++++++++++ {
+++++++++++++++++++     unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
+++++++++++++++++++     pte_t pte = pfn_pte(pfn, prot);
+++++++++++++++++++ 
+++++++++++++++++++     xen_raw_printk("addr=%p pfn=%lx mfn=%lx prot=%016llx pte=%016llx\n",
+++++++++++++++++++                    addr, pfn, get_phys_to_machine(pfn),
+++++++++++++++++++                    pgprot_val(prot), pte.pte);
+++++++++++++++++++ 
+++++++++++++++++++     if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
+++++++++++++++++++             BUG();
+++++++++++++++++++ }
+++++++++++++++++++ 
+++++++++++++++++++ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
+++++++++++++++++++ {
+++++++++++++++++++     unsigned pmdidx, pteidx;
+++++++++++++++++++     unsigned ident_pte;
+++++++++++++++++++     unsigned long pfn;
+++++++++++++++++++ 
+++++++++++++++++++     ident_pte = 0;
+++++++++++++++++++     pfn = 0;
+++++++++++++++++++     for(pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
+++++++++++++++++++             pte_t *pte_page;
+++++++++++++++++++ 
+++++++++++++++++++             /* Reuse or allocate a page of ptes */
+++++++++++++++++++             if (pmd_present(pmd[pmdidx]))
+++++++++++++++++++                     pte_page = m2v(pmd[pmdidx].pmd);
+++++++++++++++++++             else {
+++++++++++++++++++                     /* Check for free pte pages */
+++++++++++++++++++                     if (ident_pte == ARRAY_SIZE(level1_ident_pgt))
+++++++++++++++++++                             break;
+++++++++++++++++++ 
+++++++++++++++++++                     pte_page = &level1_ident_pgt[ident_pte];
+++++++++++++++++++                     ident_pte += PTRS_PER_PTE;
+++++++++++++++++++ 
+++++++++++++++++++                     pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE);
+++++++++++++++++++             }
+++++++++++++++++++ 
+++++++++++++++++++             /* Install mappings */
+++++++++++++++++++             for(pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
+++++++++++++++++++                     pte_t pte;
+++++++++++++++++++ 
+++++++++++++++++++                     if (pfn > max_pfn_mapped)
+++++++++++++++++++                             max_pfn_mapped = pfn;
+++++++++++++++++++ 
+++++++++++++++++++                     if (!pte_none(pte_page[pteidx]))
+++++++++++++++++++                             continue;
+++++++++++++++++++ 
+++++++++++++++++++                     pte = pfn_pte(pfn, PAGE_KERNEL_EXEC);
+++++++++++++++++++                     pte_page[pteidx] = pte;
+++++++++++++++++++             }
+++++++++++++++++++     }
+++++++++++++++++++ 
+++++++++++++++++++     for(pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
+++++++++++++++++++             set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
+++++++++++++++++++ 
+++++++++++++++++++     set_page_prot(pmd, PAGE_KERNEL_RO);
+++++++++++++++++++ }
+++++++++++++++++++ 
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++ static void convert_pfn_mfn(void *v)
+++++++++++++++++++ {
+++++++++++++++++++     pte_t *pte = v;
+++++++++++++++++++     int i;
+++++++++++++++++++ 
+++++++++++++++++++     /* All levels are converted the same way, so just treat them
+++++++++++++++++++        as ptes. */
+++++++++++++++++++     for(i = 0; i < PTRS_PER_PTE; i++)
+++++++++++++++++++             pte[i] = xen_make_pte(pte[i].pte);
+++++++++++++++++++ }
+++++++++++++++++++ 
+++++++++++++++++++ /*
+++++++++++++++++++  * Set up the inital kernel pagetable.
+++++++++++++++++++  *
+++++++++++++++++++  * We can construct this by grafting the Xen provided pagetable into
+++++++++++++++++++  * head_64.S's preconstructed pagetables.  We copy the Xen L2's into
+++++++++++++++++++  * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt.  This
+++++++++++++++++++  * means that only the kernel has a physical mapping to start with -
+++++++++++++++++++  * but that's enough to get __va working.  We need to fill in the rest
+++++++++++++++++++  * of the physical mapping once some sort of allocator has been set
+++++++++++++++++++  * up.
+++++++++++++++++++  */
+++++++++++++++++++ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
+++++++++++++++++++ {
+++++++++++++++++++     pud_t *l3;
+++++++++++++++++++     pmd_t *l2;
+++++++++++++++++++ 
+++++++++++++++++++     /* Zap identity mapping */
+++++++++++++++++++     init_level4_pgt[0] = __pgd(0);
+++++++++++++++++++ 
+++++++++++++++++++     /* Pre-constructed entries are in pfn, so convert to mfn */
+++++++++++++++++++     convert_pfn_mfn(init_level4_pgt);
+++++++++++++++++++     convert_pfn_mfn(level3_ident_pgt);
+++++++++++++++++++     convert_pfn_mfn(level3_kernel_pgt);
+++++++++++++++++++ 
+++++++++++++++++++     l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
+++++++++++++++++++     l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
+++++++++++++++++++ 
+++++++++++++++++++     memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+++++++++++++++++++     memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+++++++++++++++++++ 
+++++++++++++++++++     l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
+++++++++++++++++++     l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
+++++++++++++++++++     memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+++++++++++++++++++ 
+++++++++++++++++++     /* Set up identity map */
+++++++++++++++++++     xen_map_identity_early(level2_ident_pgt, max_pfn);
+++++++++++++++++++ 
+++++++++++++++++++     /* Make pagetable pieces RO */
+++++++++++++++++++     set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
+++++++++++++++++++     set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
+++++++++++++++++++     set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
+++++++++++++++++++     set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
+++++++++++++++++++     set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
+++++++++++++++++++     set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
+++++++++++++++++++ 
+++++++++++++++++++     /* Pin down new L4 */
+++++++++++++++++++     pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
+++++++++++++++++++                       PFN_DOWN(__pa_symbol(init_level4_pgt)));
+++++++++++++++++++ 
+++++++++++++++++++     /* Unpin Xen-provided one */
+++++++++++++++++++     pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
+++++++++++++++++++ 
+++++++++++++++++++     /* Switch over */
+++++++++++++++++++     pgd = init_level4_pgt;
+++++++++++++++++++ 
+++++++++++++++++++     /*
+++++++++++++++++++      * At this stage there can be no user pgd, and no page
+++++++++++++++++++      * structure to attach it to, so make sure we just set kernel
+++++++++++++++++++      * pgd.
+++++++++++++++++++      */
+++++++++++++++++++     xen_mc_batch();
+++++++++++++++++++     __xen_write_cr3(true, __pa(pgd));
+++++++++++++++++++     xen_mc_issue(PARAVIRT_LAZY_CPU);
+++++++++++++++++++ 
+++++++++++++++++++     reserve_early(__pa(xen_start_info->pt_base),
+++++++++++++++++++                   __pa(xen_start_info->pt_base +
+++++++++++++++++++                        xen_start_info->nr_pt_frames * PAGE_SIZE),
+++++++++++++++++++                   "XEN PAGETABLES");
+++++++++++++++++++ 
+++++++++++++++++++     return pgd;
+++++++++++++++++++ }
+++++++++++++++++++ #else       /* !CONFIG_X86_64 */
+++++++++++++++++++ static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss;
+++++++++++++++++++ 
+++++++++++++++++++ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
+++++++++++++++++++ {
+++++++++++++++++++     pmd_t *kernel_pmd;
+++++++++++++++++++ 
+++++++++++++++++++     init_pg_tables_start = __pa(pgd);
+++++++++++++++++++     init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
+++++++++++++++++++     max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024);
+++++++++++++++++++ 
+++++++++++++++++++     kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
+++++++++++++++++++     memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
+++++++++++++++++++ 
+++++++++++++++++++     xen_map_identity_early(level2_kernel_pgt, max_pfn);
+++++++++++++++++++ 
+++++++++++++++++++     memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
+++++++++++++++++++     set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY],
+++++++++++++++++++                     __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT));
+++++++++++++++++++ 
+++++++++++++++++++     set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
+++++++++++++++++++     set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
+++++++++++++++++++     set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
+++++++++++++++++++ 
+++++++++++++++++++     pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
+++++++++++++++++++ 
+++++++++++++++++++     xen_write_cr3(__pa(swapper_pg_dir));
+++++++++++++++++++ 
+++++++++++++++++++     pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir)));
+++++++++++++++++++ 
+++++++++++++++++++     return swapper_pg_dir;
++++++  ++++ +++ +  }
+++++++++++++++++++ #endif      /* CONFIG_X86_64 */
++++++  ++++ +++ +  
                    /* First C function to be called on Xen boot */
                    asmlinkage void __init xen_start_kernel(void)
                    {
                        pv_apic_ops = xen_apic_ops;
                        pv_mmu_ops = xen_mmu_ops;
                    
          +             if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
          +                     pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
          +                     pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
          +             }
          +         
                        machine_ops = xen_machine_ops;
                    
------------------- #ifdef CONFIG_SMP
-------------------     smp_ops = xen_smp_ops;
+++++++++++++++++++ #ifdef CONFIG_X86_64
+++++++++++++++++++     /* Disable until direct per-cpu data access. */
+++++++++++++++++++     have_vcpu_info_placement = 0;
+++++++++++++++++++     x86_64_init_pda();
                    #endif
                    
          -             xen_setup_features();
+++++++++++++++++++     xen_smp_init();
++++++++++ ++++++++ 
                        /* Get mfn list */
                        if (!xen_feature(XENFEAT_auto_translated_physmap))
          -                     phys_to_machine_mapping = (unsigned long *)xen_start_info->mfn_list;
          +                     xen_build_dynamic_phys_to_machine();
                    
                        pgd = (pgd_t *)xen_start_info->pt_base;
                    
---------- --------     init_pg_tables_start = __pa(pgd);
-------------------     init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
---------- --------     max_pfn_mapped = (init_pg_tables_end + 512*1024) >> PAGE_SHIFT;
------------------- 
-------------------     init_mm.pgd = pgd; /* use the Xen pagetables to start */
------------------- 
-------------------     /* keep using Xen gdt for now; no urgent need to change it */
------------------- 
-------------------     x86_write_percpu(xen_cr3, __pa(pgd));
-------------------     x86_write_percpu(xen_current_cr3, __pa(pgd));
+++++++++++++++++++     /* Prevent unwanted bits from being set in PTEs. */
+++++++++++++++++++     __supported_pte_mask &= ~_PAGE_GLOBAL;
+++++++++++++++++++     if (!is_initial_xendomain())
+++++++++++++++++++             __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
                    
                        /* Don't do the full vcpu_info placement stuff until we have a
                           possible map and a non-dummy shared_info. */
                        boot_params.hdr.ramdisk_image = xen_start_info->mod_start
                                ? __pa(xen_start_info->mod_start) : 0;
                        boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
+++++++++++++++++++     boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
                    
          -             if (!is_initial_xendomain())
          +             if (!is_initial_xendomain()) {
          +                     add_preferred_console("xenboot", 0, NULL);
          +                     add_preferred_console("tty", 0, NULL);
                                add_preferred_console("hvc", 0, NULL);
          +             }
          +         
+++++++++++++++++++     xen_raw_console_write("about to get started...\n");
+++++++++++++++++++ 
+++++++++++++++++++ #if 0
+++++++++++++++++++     xen_raw_printk("&boot_params=%p __pa(&boot_params)=%lx __va(__pa(&boot_params))=%lx\n",
+++++++++++++++++++                    &boot_params, __pa_symbol(&boot_params),
+++++++++++++++++++                    __va(__pa_symbol(&boot_params)));
+++++++++++++++++++ 
+++++++++++++++++++     walk(pgd, &boot_params);
+++++++++++++++++++     walk(pgd, __va(__pa(&boot_params)));
+++++++++++++++++++ #endif
++++++++++ ++++++++ 
                        /* Start the world */
          -             start_kernel();
+++++++++++++++++++ #ifdef CONFIG_X86_32
          +             i386_start_kernel();
+++++++++++++++++++ #else
+++++++++++++++++++     x86_64_start_reservations((char *)__pa_symbol(&boot_params));
+++++++++++++++++++ #endif
                    }
Simple merge
                    
                    
                    #ifdef CONFIG_X86_64
------------------- #define PV_SAVE_REGS   pushq %rax; pushq %rdi; pushq %rcx; pushq %rdx
------------------- #define PV_RESTORE_REGS popq %rdx; popq %rcx; popq %rdi; popq %rax
+++++++++++++++++++ #define PV_SAVE_REGS                                \
+++++++++++++++++++     push %rax;                              \
+++++++++++++++++++     push %rcx;                              \
+++++++++++++++++++     push %rdx;                              \
+++++++++++++++++++     push %rsi;                              \
+++++++++++++++++++     push %rdi;                              \
+++++++++++++++++++     push %r8;                               \
+++++++++++++++++++     push %r9;                               \
+++++++++++++++++++     push %r10;                              \
+++++++++++++++++++     push %r11
+++++++++++++++++++ #define PV_RESTORE_REGS                             \
+++++++++++++++++++     pop %r11;                               \
+++++++++++++++++++     pop %r10;                               \
+++++++++++++++++++     pop %r9;                                \
+++++++++++++++++++     pop %r8;                                \
+++++++++++++++++++     pop %rdi;                               \
+++++++++++++++++++     pop %rsi;                               \
+++++++++++++++++++     pop %rdx;                               \
+++++++++++++++++++     pop %rcx;                               \
+++++++++++++++++++     pop %rax
                    #define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 8)
                    #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
          +         #define PARA_INDIRECT(addr) *addr(%rip)
                    #else
                    #define PV_SAVE_REGS   pushl %eax; pushl %edi; pushl %ecx; pushl %edx
                    #define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax
                    /* Interrupt control for vSMPowered x86_64 systems */
                    void vsmp_init(void);
                    
          -         char *machine_specific_memory_setup(void);
       +  +         #ifdef CONFIG_X86_VISWS
       +  +         extern void visws_early_detect(void);
       +  +         extern int is_visws_box(void);
       +  +         #else
       +  +         static inline void visws_early_detect(void) { }
       +  +         static inline int is_visws_box(void) { return 0; }
       +  +         #endif
       +  +         
       +  +         /*
       +  +          * Any setup quirks to be performed?
       +  +          */
--- --- -- ---------extern int (*arch_time_init_quirk)(void);
--- --- -- ---------extern int (*arch_pre_intr_init_quirk)(void);
--- --- -- ---------extern int (*arch_intr_init_quirk)(void);
--- --- -- ---------extern int (*arch_trap_init_quirk)(void);
--- --- -- ---------extern char * (*arch_memory_setup_quirk)(void);
--- --- -- ---------extern int (*mach_get_smp_config_quirk)(unsigned int early);
--- --- -- ---------extern int (*mach_find_smp_config_quirk)(unsigned int reserve);
+++ ++++++++++++++++struct mpc_config_processor;
+++ ++++++++++++++++struct mpc_config_bus;
+++ ++++++++++++++++struct mp_config_oemtable;
+++ ++++++++++++++++struct x86_quirks {
+++ ++++++++++++++++    int (*arch_pre_time_init)(void);
+++ ++++++++++++++++    int (*arch_time_init)(void);
+++ ++++++++++++++++    int (*arch_pre_intr_init)(void);
+++ ++++++++++++++++    int (*arch_intr_init)(void);
+++ ++++++++++++++++    int (*arch_trap_init)(void);
+++ ++++++++++++++++    char * (*arch_memory_setup)(void);
+++ ++++++++++++++++    int (*mach_get_smp_config)(unsigned int early);
+++ ++++++++++++++++    int (*mach_find_smp_config)(unsigned int reserve);
+++ ++++++++++++++++
+++ ++++++++++++++++    int *mpc_record;
+++ ++++++++++++++++    int (*mpc_apic_id)(struct mpc_config_processor *m);
+++ ++++++++++++++++    void (*mpc_oem_bus_info)(struct mpc_config_bus *m, char *name);
+++ ++++++++++++++++    void (*mpc_oem_pci_bus)(struct mpc_config_bus *m);
+++ ++++++++++++++++    void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable,
+++ ++++++++++++++++                                    unsigned short oemsize);
+++ ++++++++++++++++};
+++ ++++++++++++++++
+++ ++++++++++++++++extern struct x86_quirks *x86_quirks;
       +  +         
                    #ifndef CONFIG_PARAVIRT
                    #define paravirt_post_allocator_init()      do {} while (0)
                    #endif
@@@@@@@@@@@@@@@@@@@@@ -67,17 -67,17 -67,17 -82,17 -67,17 -67,17 -67,17 -48,17 -67,17 -67,17 -50,19 -67,17 -67,17 -67,17 -67,17 -67,17 -67,17 -67,17 -67,17 -67,18 +82,18 @@@@@@@@@@@@@@@@@@@@@ extern struct boot_params boot_params
                     */
                    #define LOWMEMSIZE()        (0x9f000)
                    
          -         struct e820entry;
          -         
          -         char * __init machine_specific_memory_setup(void);
          -         char *memory_setup(void);
          +         #ifdef __i386__
                    
          -         int __init copy_e820_map(struct e820entry *biosmap, int nr_map);
          -         int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map);
          -         void __init add_memory_region(unsigned long long start,
          -                                   unsigned long long size, int type);
          +         void __init i386_start_kernel(void);
          +         extern void probe_roms(void);
                    
          +         extern unsigned long init_pg_tables_start;
                    extern unsigned long init_pg_tables_end;
                    
          -         
          +         #else
+++++++++++++++++++ void __init x86_64_init_pda(void);
          +         void __init x86_64_start_kernel(char *real_mode);
          +         void __init x86_64_start_reservations(char *real_mode_data);
                    
                    #endif /* __i386__ */
                    #endif /* _SETUP */