/*
* apicid, cpu, node mappings
*/
-s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
+s16 __apicid_to_node[MAX_LOCAL_APIC] = {
[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
};
DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
-void __cpuinit numa_set_node(int cpu, int node)
+void numa_set_node(int cpu, int node)
{
int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
#endif
per_cpu(x86_cpu_to_node_map, cpu) = node;
- if (node != NUMA_NO_NODE)
- set_cpu_numa_node(cpu, node);
+ set_cpu_numa_node(cpu, node);
}
-void __cpuinit numa_clear_node(int cpu)
+void numa_clear_node(int cpu)
{
numa_set_node(cpu, NUMA_NO_NODE);
}
static void __init setup_node_data(int nid, u64 start, u64 end)
{
const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
- bool remapped = false;
u64 nd_pa;
void *nd;
int tnid;
if (end && (end - start) < NODE_MIN_SIZE)
return;
- /* initialize remap allocator before aligning to ZONE_ALIGN */
- init_alloc_remap(nid, start, end);
-
start = roundup(start, ZONE_ALIGN);
printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n",
nid, start, end - 1);
/*
- * Allocate node data. Try remap allocator first, node-local
- * memory and then any node. Never allocate in DMA zone.
+ * Allocate node data. Try node-local memory and then any node.
+ * Never allocate in DMA zone.
*/
- nd = alloc_remap(nid, nd_size);
- if (nd) {
- nd_pa = __pa(nd);
- remapped = true;
- } else {
- nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
- if (!nd_pa) {
- pr_err("Cannot find %zu bytes in node %d\n",
- nd_size, nid);
- return;
- }
- nd = __va(nd_pa);
+ nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
+ if (!nd_pa) {
+ pr_err("Cannot find %zu bytes in any node\n", nd_size);
+ return;
}
+ nd = __va(nd_pa);
/* report and initialize */
- printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]%s\n",
- nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : "");
+ printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]\n",
+ nd_pa, nd_pa + nd_size - 1);
tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
- if (!remapped && tnid != nid)
+ if (tnid != nid)
printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nid, tnid);
node_data[nid] = nd;
for (i = 0; i < MAX_LOCAL_APIC; i++)
set_apicid_to_node(i, NUMA_NO_NODE);
- nodes_clear(numa_nodes_parsed);
+ /*
+ * Do not clear numa_nodes_parsed or zero numa_meminfo here, because
+ * SRAT was parsed earlier in early_parse_srat().
+ */
nodes_clear(node_possible_map);
nodes_clear(node_online_map);
- memset(&numa_meminfo, 0, sizeof(numa_meminfo));
WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES));
numa_reset_distance();
static inline unsigned long highmap_start_pfn(void)
{
- return __pa(_text) >> PAGE_SHIFT;
+ return __pa_symbol(_text) >> PAGE_SHIFT;
}
static inline unsigned long highmap_end_pfn(void)
{
- return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT;
+ return __pa_symbol(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT;
}
#endif
* The .rodata section needs to be read-only. Using the pfn
* catches all aliases.
*/
- if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT,
- __pa((unsigned long)__end_rodata) >> PAGE_SHIFT))
+ if (within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT,
+ __pa_symbol(__end_rodata) >> PAGE_SHIFT))
pgprot_val(forbidden) |= _PAGE_RW;
#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
}
EXPORT_SYMBOL_GPL(lookup_address);
+/*
+ * This is necessary because __pa() does not work on some
+ * kinds of memory, like vmalloc() or the alloc_remap()
+ * areas on 32-bit NUMA systems. The percpu areas can
+ * end up in this kind of memory, for instance.
+ *
+ * This could be optimized, but it is only intended to be
+ * used at inititalization time, and keeping it
+ * unoptimized should increase the testing coverage for
+ * the more obscure platforms.
+ */
+phys_addr_t slow_virt_to_phys(void *__virt_addr)
+{
+ unsigned long virt_addr = (unsigned long)__virt_addr;
+ phys_addr_t phys_addr;
+ unsigned long offset;
+ enum pg_level level;
+ unsigned long psize;
+ unsigned long pmask;
+ pte_t *pte;
+
+ pte = lookup_address(virt_addr, &level);
+ BUG_ON(!pte);
+ psize = page_level_size(level);
+ pmask = page_level_mask(level);
+ offset = virt_addr & ~pmask;
+ phys_addr = pte_pfn(*pte) << PAGE_SHIFT;
+ return (phys_addr | offset);
+}
+EXPORT_SYMBOL_GPL(slow_virt_to_phys);
+
/*
* Set the new pmd in all the pgds we know about:
*/
pte_t new_pte, old_pte, *tmp;
pgprot_t old_prot, new_prot, req_prot;
int i, do_split = 1;
- unsigned int level;
+ enum pg_level level;
if (cpa->force_split)
return 1;
switch (level) {
case PG_LEVEL_2M:
- psize = PMD_PAGE_SIZE;
- pmask = PMD_PAGE_MASK;
- break;
#ifdef CONFIG_X86_64
case PG_LEVEL_1G:
- psize = PUD_PAGE_SIZE;
- pmask = PUD_PAGE_MASK;
- break;
#endif
+ psize = page_level_size(level);
+ pmask = page_level_mask(level);
+ break;
default:
do_split = -EINVAL;
goto out_unlock;
pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr);
pgprot_val(req_prot) |= pgprot_val(cpa->mask_set);
+ /*
+ * Set the PSE and GLOBAL flags only if the PRESENT flag is
+ * set otherwise pmd_present/pmd_huge will return true even on
+ * a non present pmd. The canon_pgprot will clear _PAGE_GLOBAL
+ * for the ancient hardware that doesn't support it.
+ */
+ if (pgprot_val(new_prot) & _PAGE_PRESENT)
+ pgprot_val(new_prot) |= _PAGE_PSE | _PAGE_GLOBAL;
+ else
+ pgprot_val(new_prot) &= ~(_PAGE_PSE | _PAGE_GLOBAL);
+
+ new_prot = canon_pgprot(new_prot);
+
/*
* old_pte points to the large page base address. So we need
* to add the offset of the virtual address:
* The address is aligned and the number of pages
* covers the full page.
*/
- new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
+ new_pte = pfn_pte(pte_pfn(old_pte), new_prot);
__set_pmd_pte(kpte, address, new_pte);
cpa->flags |= CPA_FLUSHTLB;
do_split = 0;
return do_split;
}
-static int split_large_page(pte_t *kpte, unsigned long address)
+int __split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase)
{
unsigned long pfn, pfninc = 1;
unsigned int i, level;
- pte_t *pbase, *tmp;
+ pte_t *tmp;
pgprot_t ref_prot;
- struct page *base;
-
- if (!debug_pagealloc)
- spin_unlock(&cpa_lock);
- base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0);
- if (!debug_pagealloc)
- spin_lock(&cpa_lock);
- if (!base)
- return -ENOMEM;
+ struct page *base = virt_to_page(pbase);
spin_lock(&pgd_lock);
/*
* up for us already:
*/
tmp = lookup_address(address, &level);
- if (tmp != kpte)
- goto out_unlock;
+ if (tmp != kpte) {
+ spin_unlock(&pgd_lock);
+ return 1;
+ }
- pbase = (pte_t *)page_address(base);
paravirt_alloc_pte(&init_mm, page_to_pfn(base));
ref_prot = pte_pgprot(pte_clrhuge(*kpte));
/*
#ifdef CONFIG_X86_64
if (level == PG_LEVEL_1G) {
pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
- pgprot_val(ref_prot) |= _PAGE_PSE;
+ /*
+ * Set the PSE flags only if the PRESENT flag is set
+ * otherwise pmd_present/pmd_huge will return true
+ * even on a non present pmd.
+ */
+ if (pgprot_val(ref_prot) & _PAGE_PRESENT)
+ pgprot_val(ref_prot) |= _PAGE_PSE;
+ else
+ pgprot_val(ref_prot) &= ~_PAGE_PSE;
}
#endif
+ /*
+ * Set the GLOBAL flags only if the PRESENT flag is set
+ * otherwise pmd/pte_present will return true even on a non
+ * present pmd/pte. The canon_pgprot will clear _PAGE_GLOBAL
+ * for the ancient hardware that doesn't support it.
+ */
+ if (pgprot_val(ref_prot) & _PAGE_PRESENT)
+ pgprot_val(ref_prot) |= _PAGE_GLOBAL;
+ else
+ pgprot_val(ref_prot) &= ~_PAGE_GLOBAL;
+
/*
* Get the target pfn from the original entry:
*/
pfn = pte_pfn(*kpte);
for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc)
- set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
+ set_pte(&pbase[i], pfn_pte(pfn, canon_pgprot(ref_prot)));
- if (address >= (unsigned long)__va(0) &&
- address < (unsigned long)__va(max_low_pfn_mapped << PAGE_SHIFT))
- split_page_count(level);
-
-#ifdef CONFIG_X86_64
- if (address >= (unsigned long)__va(1UL<<32) &&
- address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT))
+ if (pfn_range_is_mapped(PFN_DOWN(__pa(address)),
+ PFN_DOWN(__pa(address)) + 1))
split_page_count(level);
-#endif
/*
* Install the new, split up pagetable.
* going on.
*/
__flush_tlb_all();
+ spin_unlock(&pgd_lock);
- base = NULL;
+ return 0;
+}
-out_unlock:
- /*
- * If we dropped out via the lookup_address check under
- * pgd_lock then stick the page back into the pool:
- */
- if (base)
+static int split_large_page(pte_t *kpte, unsigned long address)
+{
+ pte_t *pbase;
+ struct page *base;
+
+ if (!debug_pagealloc)
+ spin_unlock(&cpa_lock);
+ base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0);
+ if (!debug_pagealloc)
+ spin_lock(&cpa_lock);
+ if (!base)
+ return -ENOMEM;
+
+ pbase = (pte_t *)page_address(base);
+ if (__split_large_page(kpte, address, pbase))
__free_page(base);
- spin_unlock(&pgd_lock);
return 0;
}
new_prot = static_protections(new_prot, address, pfn);
+ /*
+ * Set the GLOBAL flags only if the PRESENT flag is
+ * set otherwise pte_present will return true even on
+ * a non present pte. The canon_pgprot will clear
+ * _PAGE_GLOBAL for the ancient hardware that doesn't
+ * support it.
+ */
+ if (pgprot_val(new_prot) & _PAGE_PRESENT)
+ pgprot_val(new_prot) |= _PAGE_GLOBAL;
+ else
+ pgprot_val(new_prot) &= ~_PAGE_GLOBAL;
+
/*
* We need to keep the pfn from the existing PTE,
* after all we're only going to change it's attributes
unsigned long vaddr;
int ret;
- if (cpa->pfn >= max_pfn_mapped)
+ if (!pfn_range_is_mapped(cpa->pfn, cpa->pfn + 1))
return 0;
-#ifdef CONFIG_X86_64
- if (cpa->pfn >= max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT)))
- return 0;
-#endif
/*
* No need to redo, when the primary call touched the direct
* mapping already:
}
EXPORT_SYMBOL(efi_enabled);
+ static bool disable_runtime = false;
static int __init setup_noefi(char *arg)
{
- clear_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility);
+ disable_runtime = true;
return 0;
}
early_param("noefi", setup_noefi);
* - Not within any part of the kernel
* - Not the bios reserved area
*/
- if ((start+size >= virt_to_phys(_text)
- && start <= virt_to_phys(_end)) ||
+ if ((start+size >= __pa_symbol(_text)
+ && start <= __pa_symbol(_end)) ||
!e820_all_mapped(start, start+size, E820_RAM) ||
memblock_is_region_reserved(start, size)) {
/* Could not reserve, skip it */
if (!efi_is_native())
pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
else {
- if (efi_runtime_init())
+ if (disable_runtime || efi_runtime_init())
return;
set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility);
}
efi_memory_desc_t *md, *prev_md = NULL;
efi_status_t status;
unsigned long size;
- u64 end, systab, end_pfn;
+ u64 end, systab, start_pfn, end_pfn;
void *p, *va, *new_memmap = NULL;
int count = 0;
size = md->num_pages << EFI_PAGE_SHIFT;
end = md->phys_addr + size;
+ start_pfn = PFN_DOWN(md->phys_addr);
end_pfn = PFN_UP(end);
- if (end_pfn <= max_low_pfn_mapped
- || (end_pfn > (1UL << (32 - PAGE_SHIFT))
- && end_pfn <= max_pfn_mapped)) {
+ if (pfn_range_is_mapped(start_pfn, end_pfn)) {
va = __va(md->phys_addr);
if (!(md->attribute & EFI_MEMORY_WB))