Merge tag 'v3.13-rc7' into x86/efi-kexec to resolve conflicts
[cascardo/linux.git] / arch / x86 / platform / efi / efi.c
index cceb813..d62ec87 100644 (file)
@@ -12,6 +12,8 @@
  *     Bibo Mao <bibo.mao@intel.com>
  *     Chandramouli Narayanan <mouli@linux.intel.com>
  *     Huang Ying <ying.huang@intel.com>
+ * Copyright (C) 2013 SuSE Labs
+ *     Borislav Petkov <bp@suse.de> - runtime services VA mapping
  *
  * Copied from efi_32.c to eliminate the duplicated code between EFI
  * 32/64 support code. --ying 2007-10-26
@@ -51,7 +53,7 @@
 #include <asm/x86_init.h>
 #include <asm/rtc.h>
 
-#define EFI_DEBUG      1
+#define EFI_DEBUG
 
 #define EFI_MIN_RESERVE 5120
 
@@ -74,6 +76,8 @@ static __initdata efi_config_table_type_t arch_tables[] = {
        {NULL_GUID, NULL, NULL},
 };
 
+u64 efi_setup;         /* efi setup_data physical address */
+
 /*
  * Returns 1 if 'facility' is enabled, 0 otherwise.
  */
@@ -110,7 +114,6 @@ static int __init setup_storage_paranoia(char *arg)
 }
 early_param("efi_no_storage_paranoia", setup_storage_paranoia);
 
-
 static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
 {
        unsigned long flags;
@@ -398,9 +401,9 @@ int __init efi_memblock_x86_reserve_range(void)
        return 0;
 }
 
-#if EFI_DEBUG
 static void __init print_efi_memmap(void)
 {
+#ifdef EFI_DEBUG
        efi_memory_desc_t *md;
        void *p;
        int i;
@@ -415,8 +418,8 @@ static void __init print_efi_memmap(void)
                        md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
                        (md->num_pages >> (20 - EFI_PAGE_SHIFT)));
        }
-}
 #endif  /*  EFI_DEBUG  */
+}
 
 void __init efi_reserve_boot_services(void)
 {
@@ -436,7 +439,7 @@ void __init efi_reserve_boot_services(void)
                 * - Not within any part of the kernel
                 * - Not the bios reserved area
                */
-               if ((start+size >= __pa_symbol(_text)
+               if ((start + size > __pa_symbol(_text)
                                && start <= __pa_symbol(_end)) ||
                        !e820_all_mapped(start, start+size, E820_RAM) ||
                        memblock_is_region_reserved(start, size)) {
@@ -489,18 +492,27 @@ static int __init efi_systab_init(void *phys)
 {
        if (efi_enabled(EFI_64BIT)) {
                efi_system_table_64_t *systab64;
+               struct efi_setup_data *data = NULL;
                u64 tmp = 0;
 
+               if (efi_setup) {
+                       data = early_memremap(efi_setup, sizeof(*data));
+                       if (!data)
+                               return -ENOMEM;
+               }
                systab64 = early_ioremap((unsigned long)phys,
                                         sizeof(*systab64));
                if (systab64 == NULL) {
                        pr_err("Couldn't map the system table!\n");
+                       if (data)
+                               early_iounmap(data, sizeof(*data));
                        return -ENOMEM;
                }
 
                efi_systab.hdr = systab64->hdr;
-               efi_systab.fw_vendor = systab64->fw_vendor;
-               tmp |= systab64->fw_vendor;
+               efi_systab.fw_vendor = data ? (unsigned long)data->fw_vendor :
+                                             systab64->fw_vendor;
+               tmp |= data ? data->fw_vendor : systab64->fw_vendor;
                efi_systab.fw_revision = systab64->fw_revision;
                efi_systab.con_in_handle = systab64->con_in_handle;
                tmp |= systab64->con_in_handle;
@@ -514,15 +526,20 @@ static int __init efi_systab_init(void *phys)
                tmp |= systab64->stderr_handle;
                efi_systab.stderr = systab64->stderr;
                tmp |= systab64->stderr;
-               efi_systab.runtime = (void *)(unsigned long)systab64->runtime;
-               tmp |= systab64->runtime;
+               efi_systab.runtime = data ?
+                                    (void *)(unsigned long)data->runtime :
+                                    (void *)(unsigned long)systab64->runtime;
+               tmp |= data ? data->runtime : systab64->runtime;
                efi_systab.boottime = (void *)(unsigned long)systab64->boottime;
                tmp |= systab64->boottime;
                efi_systab.nr_tables = systab64->nr_tables;
-               efi_systab.tables = systab64->tables;
-               tmp |= systab64->tables;
+               efi_systab.tables = data ? (unsigned long)data->tables :
+                                          systab64->tables;
+               tmp |= data ? data->tables : systab64->tables;
 
                early_iounmap(systab64, sizeof(*systab64));
+               if (data)
+                       early_iounmap(data, sizeof(*data));
 #ifdef CONFIG_X86_32
                if (tmp >> 32) {
                        pr_err("EFI data located above 4GB, disabling EFI.\n");
@@ -626,6 +643,62 @@ static int __init efi_memmap_init(void)
        return 0;
 }
 
+/*
+ * A number of config table entries get remapped to virtual addresses
+ * after entering EFI virtual mode. However, the kexec kernel requires
+ * their physical addresses therefore we pass them via setup_data and
+ * correct those entries to their respective physical addresses here.
+ *
+ * Currently only handles smbios which is necessary for some firmware
+ * implementation.
+ */
+static int __init efi_reuse_config(u64 tables, int nr_tables)
+{
+       int i, sz, ret = 0;
+       void *p, *tablep;
+       struct efi_setup_data *data;
+
+       if (!efi_setup)
+               return 0;
+
+       if (!efi_enabled(EFI_64BIT))
+               return 0;
+
+       data = early_memremap(efi_setup, sizeof(*data));
+       if (!data) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       if (!data->smbios)
+               goto out_memremap;
+
+       sz = sizeof(efi_config_table_64_t);
+
+       p = tablep = early_memremap(tables, nr_tables * sz);
+       if (!p) {
+               pr_err("Could not map Configuration table!\n");
+               ret = -ENOMEM;
+               goto out_memremap;
+       }
+
+       for (i = 0; i < efi.systab->nr_tables; i++) {
+               efi_guid_t guid;
+
+               guid = ((efi_config_table_64_t *)p)->guid;
+
+               if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID))
+                       ((efi_config_table_64_t *)p)->table = data->smbios;
+               p += sz;
+       }
+       early_iounmap(tablep, nr_tables * sz);
+
+out_memremap:
+       early_iounmap(data, sizeof(*data));
+out:
+       return ret;
+}
+
 void __init efi_init(void)
 {
        efi_char16_t *c16;
@@ -651,6 +724,10 @@ void __init efi_init(void)
 
        set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility);
 
+       efi.config_table = (unsigned long)efi.systab->tables;
+       efi.fw_vendor    = (unsigned long)efi.systab->fw_vendor;
+       efi.runtime      = (unsigned long)efi.systab->runtime;
+
        /*
         * Show what we know for posterity
         */
@@ -667,6 +744,9 @@ void __init efi_init(void)
                efi.systab->hdr.revision >> 16,
                efi.systab->hdr.revision & 0xffff, vendor);
 
+       if (efi_reuse_config(efi.systab->tables, efi.systab->nr_tables))
+               return;
+
        if (efi_config_init(arch_tables))
                return;
 
@@ -684,15 +764,12 @@ void __init efi_init(void)
                        return;
                set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility);
        }
-
        if (efi_memmap_init())
                return;
 
        set_bit(EFI_MEMMAP, &x86_efi_facility);
 
-#if EFI_DEBUG
        print_efi_memmap();
-#endif
 }
 
 void __init efi_late_init(void)
@@ -741,36 +818,38 @@ void efi_memory_uc(u64 addr, unsigned long size)
        set_memory_uc(addr, npages);
 }
 
-/*
- * This function will switch the EFI runtime services to virtual mode.
- * Essentially, look through the EFI memmap and map every region that
- * has the runtime attribute bit set in its memory descriptor and update
- * that memory descriptor with the virtual address obtained from ioremap().
- * This enables the runtime services to be called without having to
- * thunk back into physical mode for every invocation.
- */
-void __init efi_enter_virtual_mode(void)
+void __init old_map_region(efi_memory_desc_t *md)
 {
-       efi_memory_desc_t *md, *prev_md = NULL;
-       efi_status_t status;
+       u64 start_pfn, end_pfn, end;
        unsigned long size;
-       u64 end, systab, start_pfn, end_pfn;
-       void *p, *va, *new_memmap = NULL;
-       int count = 0;
+       void *va;
 
-       efi.systab = NULL;
+       start_pfn = PFN_DOWN(md->phys_addr);
+       size      = md->num_pages << PAGE_SHIFT;
+       end       = md->phys_addr + size;
+       end_pfn   = PFN_UP(end);
 
-       /*
-        * We don't do virtual mode, since we don't do runtime services, on
-        * non-native EFI
-        */
+       if (pfn_range_is_mapped(start_pfn, end_pfn)) {
+               va = __va(md->phys_addr);
 
-       if (!efi_is_native()) {
-               efi_unmap_memmap();
-               return;
-       }
+               if (!(md->attribute & EFI_MEMORY_WB))
+                       efi_memory_uc((u64)(unsigned long)va, size);
+       } else
+               va = efi_ioremap(md->phys_addr, size,
+                                md->type, md->attribute);
+
+       md->virt_addr = (u64) (unsigned long) va;
+       if (!va)
+               pr_err("ioremap of 0x%llX failed!\n",
+                      (unsigned long long)md->phys_addr);
+}
+
+/* Merge contiguous regions of the same type and attribute */
+static void __init efi_merge_regions(void)
+{
+       void *p;
+       efi_memory_desc_t *md, *prev_md = NULL;
 
-       /* Merge contiguous regions of the same type and attribute */
        for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
                u64 prev_size;
                md = p;
@@ -796,6 +875,77 @@ void __init efi_enter_virtual_mode(void)
                }
                prev_md = md;
        }
+}
+
+static void __init get_systab_virt_addr(efi_memory_desc_t *md)
+{
+       unsigned long size;
+       u64 end, systab;
+
+       size = md->num_pages << EFI_PAGE_SHIFT;
+       end = md->phys_addr + size;
+       systab = (u64)(unsigned long)efi_phys.systab;
+       if (md->phys_addr <= systab && systab < end) {
+               systab += md->virt_addr - md->phys_addr;
+               efi.systab = (efi_system_table_t *)(unsigned long)systab;
+       }
+}
+
+static int __init save_runtime_map(void)
+{
+       efi_memory_desc_t *md;
+       void *tmp, *p, *q = NULL;
+       int count = 0;
+
+       for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+               md = p;
+
+               if (!(md->attribute & EFI_MEMORY_RUNTIME) ||
+                   (md->type == EFI_BOOT_SERVICES_CODE) ||
+                   (md->type == EFI_BOOT_SERVICES_DATA))
+                       continue;
+               tmp = krealloc(q, (count + 1) * memmap.desc_size, GFP_KERNEL);
+               if (!tmp)
+                       goto out;
+               q = tmp;
+
+               memcpy(q + count * memmap.desc_size, md, memmap.desc_size);
+               count++;
+       }
+
+       efi_runtime_map_setup(q, count, memmap.desc_size);
+
+       return 0;
+out:
+       kfree(q);
+       return -ENOMEM;
+}
+
+/*
+ * Map efi regions which were passed via setup_data. The virt_addr is a fixed
+ * addr which was used in first kernel of a kexec boot.
+ */
+static void __init efi_map_regions_fixed(void)
+{
+       void *p;
+       efi_memory_desc_t *md;
+
+       for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+               md = p;
+               efi_map_region_fixed(md); /* FIXME: add error handling */
+               get_systab_virt_addr(md);
+       }
+
+}
+
+/*
+ * Map efi memory ranges for runtime serivce and update new_memmap with virtual
+ * addresses.
+ */
+static void * __init efi_map_regions(int *count)
+{
+       efi_memory_desc_t *md;
+       void *p, *tmp, *new_memmap = NULL;
 
        for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
                md = p;
@@ -807,53 +957,95 @@ void __init efi_enter_virtual_mode(void)
                                continue;
                }
 
-               size = md->num_pages << EFI_PAGE_SHIFT;
-               end = md->phys_addr + size;
+               efi_map_region(md);
+               get_systab_virt_addr(md);
 
-               start_pfn = PFN_DOWN(md->phys_addr);
-               end_pfn = PFN_UP(end);
-               if (pfn_range_is_mapped(start_pfn, end_pfn)) {
-                       va = __va(md->phys_addr);
+               tmp = krealloc(new_memmap, (*count + 1) * memmap.desc_size,
+                              GFP_KERNEL);
+               if (!tmp)
+                       goto out;
+               new_memmap = tmp;
+               memcpy(new_memmap + (*count * memmap.desc_size), md,
+                      memmap.desc_size);
+               (*count)++;
+       }
 
-                       if (!(md->attribute & EFI_MEMORY_WB))
-                               efi_memory_uc((u64)(unsigned long)va, size);
-               } else
-                       va = efi_ioremap(md->phys_addr, size,
-                                        md->type, md->attribute);
+       return new_memmap;
+out:
+       kfree(new_memmap);
+       return NULL;
+}
+
+/*
+ * This function will switch the EFI runtime services to virtual mode.
+ * Essentially, we look through the EFI memmap and map every region that
+ * has the runtime attribute bit set in its memory descriptor into the
+ * ->trampoline_pgd page table using a top-down VA allocation scheme.
+ *
+ * The old method which used to update that memory descriptor with the
+ * virtual address obtained from ioremap() is still supported when the
+ * kernel is booted with efi=old_map on its command line. Same old
+ * method enabled the runtime services to be called without having to
+ * thunk back into physical mode for every invocation.
+ *
+ * The new method does a pagetable switch in a preemption-safe manner
+ * so that we're in a different address space when calling a runtime
+ * function. For function arguments passing we do copy the PGDs of the
+ * kernel page table into ->trampoline_pgd prior to each call.
+ *
+ * Specially for kexec boot, efi runtime maps in previous kernel should
+ * be passed in via setup_data. In that case runtime ranges will be mapped
+ * to the same virtual addresses as the first kernel.
+ */
+void __init efi_enter_virtual_mode(void)
+{
+       efi_status_t status;
+       void *new_memmap = NULL;
+       int err, count = 0;
 
-               md->virt_addr = (u64) (unsigned long) va;
+       efi.systab = NULL;
 
-               if (!va) {
-                       pr_err("ioremap of 0x%llX failed!\n",
-                              (unsigned long long)md->phys_addr);
-                       continue;
-               }
+       /*
+        * We don't do virtual mode, since we don't do runtime services, on
+        * non-native EFI
+        */
+       if (!efi_is_native()) {
+               efi_unmap_memmap();
+               return;
+       }
 
-               systab = (u64) (unsigned long) efi_phys.systab;
-               if (md->phys_addr <= systab && systab < end) {
-                       systab += md->virt_addr - md->phys_addr;
-                       efi.systab = (efi_system_table_t *) (unsigned long) systab;
+       if (efi_setup) {
+               efi_map_regions_fixed();
+       } else {
+               efi_merge_regions();
+               new_memmap = efi_map_regions(&count);
+               if (!new_memmap) {
+                       pr_err("Error reallocating memory, EFI runtime non-functional!\n");
+                       return;
                }
-               new_memmap = krealloc(new_memmap,
-                                     (count + 1) * memmap.desc_size,
-                                     GFP_KERNEL);
-               memcpy(new_memmap + (count * memmap.desc_size), md,
-                      memmap.desc_size);
-               count++;
        }
 
+       err = save_runtime_map();
+       if (err)
+               pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n");
+
        BUG_ON(!efi.systab);
 
-       status = phys_efi_set_virtual_address_map(
-               memmap.desc_size * count,
-               memmap.desc_size,
-               memmap.desc_version,
-               (efi_memory_desc_t *)__pa(new_memmap));
+       efi_setup_page_tables();
+       efi_sync_low_kernel_mappings();
 
-       if (status != EFI_SUCCESS) {
-               pr_alert("Unable to switch EFI into virtual mode "
-                        "(status=%lx)!\n", status);
-               panic("EFI call to SetVirtualAddressMap() failed!");
+       if (!efi_setup) {
+               status = phys_efi_set_virtual_address_map(
+                       memmap.desc_size * count,
+                       memmap.desc_size,
+                       memmap.desc_version,
+                       (efi_memory_desc_t *)__pa(new_memmap));
+
+               if (status != EFI_SUCCESS) {
+                       pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n",
+                                status);
+                       panic("EFI call to SetVirtualAddressMap() failed!");
+               }
        }
 
        /*
@@ -876,7 +1068,8 @@ void __init efi_enter_virtual_mode(void)
        efi.query_variable_info = virt_efi_query_variable_info;
        efi.update_capsule = virt_efi_update_capsule;
        efi.query_capsule_caps = virt_efi_query_capsule_caps;
-       if (__supported_pte_mask & _PAGE_NX)
+
+       if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX))
                runtime_code_page_mkexec();
 
        kfree(new_memmap);
@@ -1006,3 +1199,15 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size)
        return EFI_SUCCESS;
 }
 EXPORT_SYMBOL_GPL(efi_query_variable_store);
+
+static int __init parse_efi_cmdline(char *str)
+{
+       if (*str == '=')
+               str++;
+
+       if (!strncmp(str, "old_map", 7))
+               set_bit(EFI_OLD_MEMMAP, &x86_efi_facility);
+
+       return 0;
+}
+early_param("efi", parse_efi_cmdline);