x86/e820: Prepare e280 code for switch to dynamic storage
[cascardo/linux.git] / arch / x86 / platform / efi / efi.c
1 /*
2  * Common EFI (Extensible Firmware Interface) support functions
3  * Based on Extensible Firmware Interface Specification version 1.0
4  *
5  * Copyright (C) 1999 VA Linux Systems
6  * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
7  * Copyright (C) 1999-2002 Hewlett-Packard Co.
8  *      David Mosberger-Tang <davidm@hpl.hp.com>
9  *      Stephane Eranian <eranian@hpl.hp.com>
10  * Copyright (C) 2005-2008 Intel Co.
11  *      Fenghua Yu <fenghua.yu@intel.com>
12  *      Bibo Mao <bibo.mao@intel.com>
13  *      Chandramouli Narayanan <mouli@linux.intel.com>
14  *      Huang Ying <ying.huang@intel.com>
15  * Copyright (C) 2013 SuSE Labs
16  *      Borislav Petkov <bp@suse.de> - runtime services VA mapping
17  *
18  * Copied from efi_32.c to eliminate the duplicated code between EFI
19  * 32/64 support code. --ying 2007-10-26
20  *
21  * All EFI Runtime Services are not implemented yet as EFI only
22  * supports physical mode addressing on SoftSDV. This is to be fixed
23  * in a future version.  --drummond 1999-07-20
24  *
25  * Implemented EFI runtime services and virtual mode calls.  --davidm
26  *
27  * Goutham Rao: <goutham.rao@intel.com>
28  *      Skip non-WB memory and ignore empty memory ranges.
29  */
30
31 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
32
33 #include <linux/kernel.h>
34 #include <linux/init.h>
35 #include <linux/efi.h>
36 #include <linux/efi-bgrt.h>
37 #include <linux/export.h>
38 #include <linux/bootmem.h>
39 #include <linux/slab.h>
40 #include <linux/memblock.h>
41 #include <linux/spinlock.h>
42 #include <linux/uaccess.h>
43 #include <linux/time.h>
44 #include <linux/io.h>
45 #include <linux/reboot.h>
46 #include <linux/bcd.h>
47
48 #include <asm/setup.h>
49 #include <asm/efi.h>
50 #include <asm/time.h>
51 #include <asm/cacheflush.h>
52 #include <asm/tlbflush.h>
53 #include <asm/x86_init.h>
54 #include <asm/uv/uv.h>
55
56 static struct efi efi_phys __initdata;
57 static efi_system_table_t efi_systab __initdata;
58
59 static efi_config_table_type_t arch_tables[] __initdata = {
60 #ifdef CONFIG_X86_UV
61         {UV_SYSTEM_TABLE_GUID, "UVsystab", &efi.uv_systab},
62 #endif
63         {NULL_GUID, NULL, NULL},
64 };
65
66 u64 efi_setup;          /* efi setup_data physical address */
67
68 static int add_efi_memmap __initdata;
69 static int __init setup_add_efi_memmap(char *arg)
70 {
71         add_efi_memmap = 1;
72         return 0;
73 }
74 early_param("add_efi_memmap", setup_add_efi_memmap);
75
76 static efi_status_t __init phys_efi_set_virtual_address_map(
77         unsigned long memory_map_size,
78         unsigned long descriptor_size,
79         u32 descriptor_version,
80         efi_memory_desc_t *virtual_map)
81 {
82         efi_status_t status;
83         unsigned long flags;
84         pgd_t *save_pgd;
85
86         save_pgd = efi_call_phys_prolog();
87
88         /* Disable interrupts around EFI calls: */
89         local_irq_save(flags);
90         status = efi_call_phys(efi_phys.set_virtual_address_map,
91                                memory_map_size, descriptor_size,
92                                descriptor_version, virtual_map);
93         local_irq_restore(flags);
94
95         efi_call_phys_epilog(save_pgd);
96
97         return status;
98 }
99
100 void __init efi_find_mirror(void)
101 {
102         efi_memory_desc_t *md;
103         u64 mirror_size = 0, total_size = 0;
104
105         for_each_efi_memory_desc(md) {
106                 unsigned long long start = md->phys_addr;
107                 unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
108
109                 total_size += size;
110                 if (md->attribute & EFI_MEMORY_MORE_RELIABLE) {
111                         memblock_mark_mirror(start, size);
112                         mirror_size += size;
113                 }
114         }
115         if (mirror_size)
116                 pr_info("Memory: %lldM/%lldM mirrored memory\n",
117                         mirror_size>>20, total_size>>20);
118 }
119
120 /*
121  * Tell the kernel about the EFI memory map.  This might include
122  * more than the max 128 entries that can fit in the e820 legacy
123  * (zeropage) memory map.
124  */
125
126 static void __init do_add_efi_memmap(void)
127 {
128         efi_memory_desc_t *md;
129
130         for_each_efi_memory_desc(md) {
131                 unsigned long long start = md->phys_addr;
132                 unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
133                 int e820_type;
134
135                 switch (md->type) {
136                 case EFI_LOADER_CODE:
137                 case EFI_LOADER_DATA:
138                 case EFI_BOOT_SERVICES_CODE:
139                 case EFI_BOOT_SERVICES_DATA:
140                 case EFI_CONVENTIONAL_MEMORY:
141                         if (md->attribute & EFI_MEMORY_WB)
142                                 e820_type = E820_RAM;
143                         else
144                                 e820_type = E820_RESERVED;
145                         break;
146                 case EFI_ACPI_RECLAIM_MEMORY:
147                         e820_type = E820_ACPI;
148                         break;
149                 case EFI_ACPI_MEMORY_NVS:
150                         e820_type = E820_NVS;
151                         break;
152                 case EFI_UNUSABLE_MEMORY:
153                         e820_type = E820_UNUSABLE;
154                         break;
155                 case EFI_PERSISTENT_MEMORY:
156                         e820_type = E820_PMEM;
157                         break;
158                 default:
159                         /*
160                          * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE
161                          * EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO
162                          * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE
163                          */
164                         e820_type = E820_RESERVED;
165                         break;
166                 }
167                 e820_add_region(start, size, e820_type);
168         }
169         sanitize_e820_map(e820->map, ARRAY_SIZE(e820->map), &e820->nr_map);
170 }
171
172 int __init efi_memblock_x86_reserve_range(void)
173 {
174         struct efi_info *e = &boot_params.efi_info;
175         phys_addr_t pmap;
176
177         if (efi_enabled(EFI_PARAVIRT))
178                 return 0;
179
180 #ifdef CONFIG_X86_32
181         /* Can't handle data above 4GB at this time */
182         if (e->efi_memmap_hi) {
183                 pr_err("Memory map is above 4GB, disabling EFI.\n");
184                 return -EINVAL;
185         }
186         pmap =  e->efi_memmap;
187 #else
188         pmap = (e->efi_memmap | ((__u64)e->efi_memmap_hi << 32));
189 #endif
190         efi.memmap.phys_map     = pmap;
191         efi.memmap.nr_map       = e->efi_memmap_size /
192                                   e->efi_memdesc_size;
193         efi.memmap.desc_size    = e->efi_memdesc_size;
194         efi.memmap.desc_version = e->efi_memdesc_version;
195
196         WARN(efi.memmap.desc_version != 1,
197              "Unexpected EFI_MEMORY_DESCRIPTOR version %ld",
198              efi.memmap.desc_version);
199
200         memblock_reserve(pmap, efi.memmap.nr_map * efi.memmap.desc_size);
201
202         return 0;
203 }
204
205 void __init efi_print_memmap(void)
206 {
207         efi_memory_desc_t *md;
208         int i = 0;
209
210         for_each_efi_memory_desc(md) {
211                 char buf[64];
212
213                 pr_info("mem%02u: %s range=[0x%016llx-0x%016llx] (%lluMB)\n",
214                         i++, efi_md_typeattr_format(buf, sizeof(buf), md),
215                         md->phys_addr,
216                         md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1,
217                         (md->num_pages >> (20 - EFI_PAGE_SHIFT)));
218         }
219 }
220
221 void __init efi_unmap_memmap(void)
222 {
223         unsigned long size;
224
225         clear_bit(EFI_MEMMAP, &efi.flags);
226
227         size = efi.memmap.nr_map * efi.memmap.desc_size;
228         if (efi.memmap.map) {
229                 early_memunmap(efi.memmap.map, size);
230                 efi.memmap.map = NULL;
231         }
232 }
233
234 static int __init efi_systab_init(void *phys)
235 {
236         if (efi_enabled(EFI_64BIT)) {
237                 efi_system_table_64_t *systab64;
238                 struct efi_setup_data *data = NULL;
239                 u64 tmp = 0;
240
241                 if (efi_setup) {
242                         data = early_memremap(efi_setup, sizeof(*data));
243                         if (!data)
244                                 return -ENOMEM;
245                 }
246                 systab64 = early_memremap((unsigned long)phys,
247                                          sizeof(*systab64));
248                 if (systab64 == NULL) {
249                         pr_err("Couldn't map the system table!\n");
250                         if (data)
251                                 early_memunmap(data, sizeof(*data));
252                         return -ENOMEM;
253                 }
254
255                 efi_systab.hdr = systab64->hdr;
256                 efi_systab.fw_vendor = data ? (unsigned long)data->fw_vendor :
257                                               systab64->fw_vendor;
258                 tmp |= data ? data->fw_vendor : systab64->fw_vendor;
259                 efi_systab.fw_revision = systab64->fw_revision;
260                 efi_systab.con_in_handle = systab64->con_in_handle;
261                 tmp |= systab64->con_in_handle;
262                 efi_systab.con_in = systab64->con_in;
263                 tmp |= systab64->con_in;
264                 efi_systab.con_out_handle = systab64->con_out_handle;
265                 tmp |= systab64->con_out_handle;
266                 efi_systab.con_out = systab64->con_out;
267                 tmp |= systab64->con_out;
268                 efi_systab.stderr_handle = systab64->stderr_handle;
269                 tmp |= systab64->stderr_handle;
270                 efi_systab.stderr = systab64->stderr;
271                 tmp |= systab64->stderr;
272                 efi_systab.runtime = data ?
273                                      (void *)(unsigned long)data->runtime :
274                                      (void *)(unsigned long)systab64->runtime;
275                 tmp |= data ? data->runtime : systab64->runtime;
276                 efi_systab.boottime = (void *)(unsigned long)systab64->boottime;
277                 tmp |= systab64->boottime;
278                 efi_systab.nr_tables = systab64->nr_tables;
279                 efi_systab.tables = data ? (unsigned long)data->tables :
280                                            systab64->tables;
281                 tmp |= data ? data->tables : systab64->tables;
282
283                 early_memunmap(systab64, sizeof(*systab64));
284                 if (data)
285                         early_memunmap(data, sizeof(*data));
286 #ifdef CONFIG_X86_32
287                 if (tmp >> 32) {
288                         pr_err("EFI data located above 4GB, disabling EFI.\n");
289                         return -EINVAL;
290                 }
291 #endif
292         } else {
293                 efi_system_table_32_t *systab32;
294
295                 systab32 = early_memremap((unsigned long)phys,
296                                          sizeof(*systab32));
297                 if (systab32 == NULL) {
298                         pr_err("Couldn't map the system table!\n");
299                         return -ENOMEM;
300                 }
301
302                 efi_systab.hdr = systab32->hdr;
303                 efi_systab.fw_vendor = systab32->fw_vendor;
304                 efi_systab.fw_revision = systab32->fw_revision;
305                 efi_systab.con_in_handle = systab32->con_in_handle;
306                 efi_systab.con_in = systab32->con_in;
307                 efi_systab.con_out_handle = systab32->con_out_handle;
308                 efi_systab.con_out = systab32->con_out;
309                 efi_systab.stderr_handle = systab32->stderr_handle;
310                 efi_systab.stderr = systab32->stderr;
311                 efi_systab.runtime = (void *)(unsigned long)systab32->runtime;
312                 efi_systab.boottime = (void *)(unsigned long)systab32->boottime;
313                 efi_systab.nr_tables = systab32->nr_tables;
314                 efi_systab.tables = systab32->tables;
315
316                 early_memunmap(systab32, sizeof(*systab32));
317         }
318
319         efi.systab = &efi_systab;
320
321         /*
322          * Verify the EFI Table
323          */
324         if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) {
325                 pr_err("System table signature incorrect!\n");
326                 return -EINVAL;
327         }
328         if ((efi.systab->hdr.revision >> 16) == 0)
329                 pr_err("Warning: System table version %d.%02d, expected 1.00 or greater!\n",
330                        efi.systab->hdr.revision >> 16,
331                        efi.systab->hdr.revision & 0xffff);
332
333         return 0;
334 }
335
336 static int __init efi_runtime_init32(void)
337 {
338         efi_runtime_services_32_t *runtime;
339
340         runtime = early_memremap((unsigned long)efi.systab->runtime,
341                         sizeof(efi_runtime_services_32_t));
342         if (!runtime) {
343                 pr_err("Could not map the runtime service table!\n");
344                 return -ENOMEM;
345         }
346
347         /*
348          * We will only need *early* access to the SetVirtualAddressMap
349          * EFI runtime service. All other runtime services will be called
350          * via the virtual mapping.
351          */
352         efi_phys.set_virtual_address_map =
353                         (efi_set_virtual_address_map_t *)
354                         (unsigned long)runtime->set_virtual_address_map;
355         early_memunmap(runtime, sizeof(efi_runtime_services_32_t));
356
357         return 0;
358 }
359
360 static int __init efi_runtime_init64(void)
361 {
362         efi_runtime_services_64_t *runtime;
363
364         runtime = early_memremap((unsigned long)efi.systab->runtime,
365                         sizeof(efi_runtime_services_64_t));
366         if (!runtime) {
367                 pr_err("Could not map the runtime service table!\n");
368                 return -ENOMEM;
369         }
370
371         /*
372          * We will only need *early* access to the SetVirtualAddressMap
373          * EFI runtime service. All other runtime services will be called
374          * via the virtual mapping.
375          */
376         efi_phys.set_virtual_address_map =
377                         (efi_set_virtual_address_map_t *)
378                         (unsigned long)runtime->set_virtual_address_map;
379         early_memunmap(runtime, sizeof(efi_runtime_services_64_t));
380
381         return 0;
382 }
383
384 static int __init efi_runtime_init(void)
385 {
386         int rv;
387
388         /*
389          * Check out the runtime services table. We need to map
390          * the runtime services table so that we can grab the physical
391          * address of several of the EFI runtime functions, needed to
392          * set the firmware into virtual mode.
393          *
394          * When EFI_PARAVIRT is in force then we could not map runtime
395          * service memory region because we do not have direct access to it.
396          * However, runtime services are available through proxy functions
397          * (e.g. in case of Xen dom0 EFI implementation they call special
398          * hypercall which executes relevant EFI functions) and that is why
399          * they are always enabled.
400          */
401
402         if (!efi_enabled(EFI_PARAVIRT)) {
403                 if (efi_enabled(EFI_64BIT))
404                         rv = efi_runtime_init64();
405                 else
406                         rv = efi_runtime_init32();
407
408                 if (rv)
409                         return rv;
410         }
411
412         set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
413
414         return 0;
415 }
416
417 static int __init efi_memmap_init(void)
418 {
419         unsigned long addr, size;
420
421         if (efi_enabled(EFI_PARAVIRT))
422                 return 0;
423
424         /* Map the EFI memory map */
425         size = efi.memmap.nr_map * efi.memmap.desc_size;
426         addr = (unsigned long)efi.memmap.phys_map;
427
428         efi.memmap.map = early_memremap(addr, size);
429         if (efi.memmap.map == NULL) {
430                 pr_err("Could not map the memory map!\n");
431                 return -ENOMEM;
432         }
433
434         efi.memmap.map_end = efi.memmap.map + size;
435
436         if (add_efi_memmap)
437                 do_add_efi_memmap();
438
439         set_bit(EFI_MEMMAP, &efi.flags);
440
441         return 0;
442 }
443
444 void __init efi_init(void)
445 {
446         efi_char16_t *c16;
447         char vendor[100] = "unknown";
448         int i = 0;
449         void *tmp;
450
451 #ifdef CONFIG_X86_32
452         if (boot_params.efi_info.efi_systab_hi ||
453             boot_params.efi_info.efi_memmap_hi) {
454                 pr_info("Table located above 4GB, disabling EFI.\n");
455                 return;
456         }
457         efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
458 #else
459         efi_phys.systab = (efi_system_table_t *)
460                           (boot_params.efi_info.efi_systab |
461                           ((__u64)boot_params.efi_info.efi_systab_hi<<32));
462 #endif
463
464         if (efi_systab_init(efi_phys.systab))
465                 return;
466
467         efi.config_table = (unsigned long)efi.systab->tables;
468         efi.fw_vendor    = (unsigned long)efi.systab->fw_vendor;
469         efi.runtime      = (unsigned long)efi.systab->runtime;
470
471         /*
472          * Show what we know for posterity
473          */
474         c16 = tmp = early_memremap(efi.systab->fw_vendor, 2);
475         if (c16) {
476                 for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
477                         vendor[i] = *c16++;
478                 vendor[i] = '\0';
479         } else
480                 pr_err("Could not map the firmware vendor!\n");
481         early_memunmap(tmp, 2);
482
483         pr_info("EFI v%u.%.02u by %s\n",
484                 efi.systab->hdr.revision >> 16,
485                 efi.systab->hdr.revision & 0xffff, vendor);
486
487         if (efi_reuse_config(efi.systab->tables, efi.systab->nr_tables))
488                 return;
489
490         if (efi_config_init(arch_tables))
491                 return;
492
493         /*
494          * Note: We currently don't support runtime services on an EFI
495          * that doesn't match the kernel 32/64-bit mode.
496          */
497
498         if (!efi_runtime_supported())
499                 pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
500         else {
501                 if (efi_runtime_disabled() || efi_runtime_init())
502                         return;
503         }
504         if (efi_memmap_init())
505                 return;
506
507         if (efi_enabled(EFI_DBG))
508                 efi_print_memmap();
509
510         efi_esrt_init();
511 }
512
513 void __init efi_late_init(void)
514 {
515         efi_bgrt_init();
516 }
517
518 void __init efi_set_executable(efi_memory_desc_t *md, bool executable)
519 {
520         u64 addr, npages;
521
522         addr = md->virt_addr;
523         npages = md->num_pages;
524
525         memrange_efi_to_native(&addr, &npages);
526
527         if (executable)
528                 set_memory_x(addr, npages);
529         else
530                 set_memory_nx(addr, npages);
531 }
532
533 void __init runtime_code_page_mkexec(void)
534 {
535         efi_memory_desc_t *md;
536
537         /* Make EFI runtime service code area executable */
538         for_each_efi_memory_desc(md) {
539                 if (md->type != EFI_RUNTIME_SERVICES_CODE)
540                         continue;
541
542                 efi_set_executable(md, true);
543         }
544 }
545
546 void __init efi_memory_uc(u64 addr, unsigned long size)
547 {
548         unsigned long page_shift = 1UL << EFI_PAGE_SHIFT;
549         u64 npages;
550
551         npages = round_up(size, page_shift) / page_shift;
552         memrange_efi_to_native(&addr, &npages);
553         set_memory_uc(addr, npages);
554 }
555
556 void __init old_map_region(efi_memory_desc_t *md)
557 {
558         u64 start_pfn, end_pfn, end;
559         unsigned long size;
560         void *va;
561
562         start_pfn = PFN_DOWN(md->phys_addr);
563         size      = md->num_pages << PAGE_SHIFT;
564         end       = md->phys_addr + size;
565         end_pfn   = PFN_UP(end);
566
567         if (pfn_range_is_mapped(start_pfn, end_pfn)) {
568                 va = __va(md->phys_addr);
569
570                 if (!(md->attribute & EFI_MEMORY_WB))
571                         efi_memory_uc((u64)(unsigned long)va, size);
572         } else
573                 va = efi_ioremap(md->phys_addr, size,
574                                  md->type, md->attribute);
575
576         md->virt_addr = (u64) (unsigned long) va;
577         if (!va)
578                 pr_err("ioremap of 0x%llX failed!\n",
579                        (unsigned long long)md->phys_addr);
580 }
581
582 /* Merge contiguous regions of the same type and attribute */
583 static void __init efi_merge_regions(void)
584 {
585         efi_memory_desc_t *md, *prev_md = NULL;
586
587         for_each_efi_memory_desc(md) {
588                 u64 prev_size;
589
590                 if (!prev_md) {
591                         prev_md = md;
592                         continue;
593                 }
594
595                 if (prev_md->type != md->type ||
596                     prev_md->attribute != md->attribute) {
597                         prev_md = md;
598                         continue;
599                 }
600
601                 prev_size = prev_md->num_pages << EFI_PAGE_SHIFT;
602
603                 if (md->phys_addr == (prev_md->phys_addr + prev_size)) {
604                         prev_md->num_pages += md->num_pages;
605                         md->type = EFI_RESERVED_TYPE;
606                         md->attribute = 0;
607                         continue;
608                 }
609                 prev_md = md;
610         }
611 }
612
613 static void __init get_systab_virt_addr(efi_memory_desc_t *md)
614 {
615         unsigned long size;
616         u64 end, systab;
617
618         size = md->num_pages << EFI_PAGE_SHIFT;
619         end = md->phys_addr + size;
620         systab = (u64)(unsigned long)efi_phys.systab;
621         if (md->phys_addr <= systab && systab < end) {
622                 systab += md->virt_addr - md->phys_addr;
623                 efi.systab = (efi_system_table_t *)(unsigned long)systab;
624         }
625 }
626
627 static void __init save_runtime_map(void)
628 {
629 #ifdef CONFIG_KEXEC_CORE
630         unsigned long desc_size;
631         efi_memory_desc_t *md;
632         void *tmp, *q = NULL;
633         int count = 0;
634
635         if (efi_enabled(EFI_OLD_MEMMAP))
636                 return;
637
638         desc_size = efi.memmap.desc_size;
639
640         for_each_efi_memory_desc(md) {
641                 if (!(md->attribute & EFI_MEMORY_RUNTIME) ||
642                     (md->type == EFI_BOOT_SERVICES_CODE) ||
643                     (md->type == EFI_BOOT_SERVICES_DATA))
644                         continue;
645                 tmp = krealloc(q, (count + 1) * desc_size, GFP_KERNEL);
646                 if (!tmp)
647                         goto out;
648                 q = tmp;
649
650                 memcpy(q + count * desc_size, md, desc_size);
651                 count++;
652         }
653
654         efi_runtime_map_setup(q, count, desc_size);
655         return;
656
657 out:
658         kfree(q);
659         pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n");
660 #endif
661 }
662
663 static void *realloc_pages(void *old_memmap, int old_shift)
664 {
665         void *ret;
666
667         ret = (void *)__get_free_pages(GFP_KERNEL, old_shift + 1);
668         if (!ret)
669                 goto out;
670
671         /*
672          * A first-time allocation doesn't have anything to copy.
673          */
674         if (!old_memmap)
675                 return ret;
676
677         memcpy(ret, old_memmap, PAGE_SIZE << old_shift);
678
679 out:
680         free_pages((unsigned long)old_memmap, old_shift);
681         return ret;
682 }
683
684 /*
685  * Iterate the EFI memory map in reverse order because the regions
686  * will be mapped top-down. The end result is the same as if we had
687  * mapped things forward, but doesn't require us to change the
688  * existing implementation of efi_map_region().
689  */
690 static inline void *efi_map_next_entry_reverse(void *entry)
691 {
692         /* Initial call */
693         if (!entry)
694                 return efi.memmap.map_end - efi.memmap.desc_size;
695
696         entry -= efi.memmap.desc_size;
697         if (entry < efi.memmap.map)
698                 return NULL;
699
700         return entry;
701 }
702
703 /*
704  * efi_map_next_entry - Return the next EFI memory map descriptor
705  * @entry: Previous EFI memory map descriptor
706  *
707  * This is a helper function to iterate over the EFI memory map, which
708  * we do in different orders depending on the current configuration.
709  *
710  * To begin traversing the memory map @entry must be %NULL.
711  *
712  * Returns %NULL when we reach the end of the memory map.
713  */
714 static void *efi_map_next_entry(void *entry)
715 {
716         if (!efi_enabled(EFI_OLD_MEMMAP) && efi_enabled(EFI_64BIT)) {
717                 /*
718                  * Starting in UEFI v2.5 the EFI_PROPERTIES_TABLE
719                  * config table feature requires us to map all entries
720                  * in the same order as they appear in the EFI memory
721                  * map. That is to say, entry N must have a lower
722                  * virtual address than entry N+1. This is because the
723                  * firmware toolchain leaves relative references in
724                  * the code/data sections, which are split and become
725                  * separate EFI memory regions. Mapping things
726                  * out-of-order leads to the firmware accessing
727                  * unmapped addresses.
728                  *
729                  * Since we need to map things this way whether or not
730                  * the kernel actually makes use of
731                  * EFI_PROPERTIES_TABLE, let's just switch to this
732                  * scheme by default for 64-bit.
733                  */
734                 return efi_map_next_entry_reverse(entry);
735         }
736
737         /* Initial call */
738         if (!entry)
739                 return efi.memmap.map;
740
741         entry += efi.memmap.desc_size;
742         if (entry >= efi.memmap.map_end)
743                 return NULL;
744
745         return entry;
746 }
747
748 /*
749  * Map the efi memory ranges of the runtime services and update new_mmap with
750  * virtual addresses.
751  */
752 static void * __init efi_map_regions(int *count, int *pg_shift)
753 {
754         void *p, *new_memmap = NULL;
755         unsigned long left = 0;
756         unsigned long desc_size;
757         efi_memory_desc_t *md;
758
759         desc_size = efi.memmap.desc_size;
760
761         p = NULL;
762         while ((p = efi_map_next_entry(p))) {
763                 md = p;
764                 if (!(md->attribute & EFI_MEMORY_RUNTIME)) {
765 #ifdef CONFIG_X86_64
766                         if (md->type != EFI_BOOT_SERVICES_CODE &&
767                             md->type != EFI_BOOT_SERVICES_DATA)
768 #endif
769                                 continue;
770                 }
771
772                 efi_map_region(md);
773                 get_systab_virt_addr(md);
774
775                 if (left < desc_size) {
776                         new_memmap = realloc_pages(new_memmap, *pg_shift);
777                         if (!new_memmap)
778                                 return NULL;
779
780                         left += PAGE_SIZE << *pg_shift;
781                         (*pg_shift)++;
782                 }
783
784                 memcpy(new_memmap + (*count * desc_size), md, desc_size);
785
786                 left -= desc_size;
787                 (*count)++;
788         }
789
790         return new_memmap;
791 }
792
793 static void __init kexec_enter_virtual_mode(void)
794 {
795 #ifdef CONFIG_KEXEC_CORE
796         efi_memory_desc_t *md;
797         unsigned int num_pages;
798
799         efi.systab = NULL;
800
801         /*
802          * We don't do virtual mode, since we don't do runtime services, on
803          * non-native EFI
804          */
805         if (!efi_is_native()) {
806                 efi_unmap_memmap();
807                 clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
808                 return;
809         }
810
811         if (efi_alloc_page_tables()) {
812                 pr_err("Failed to allocate EFI page tables\n");
813                 clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
814                 return;
815         }
816
817         /*
818         * Map efi regions which were passed via setup_data. The virt_addr is a
819         * fixed addr which was used in first kernel of a kexec boot.
820         */
821         for_each_efi_memory_desc(md) {
822                 efi_map_region_fixed(md); /* FIXME: add error handling */
823                 get_systab_virt_addr(md);
824         }
825
826         save_runtime_map();
827
828         BUG_ON(!efi.systab);
829
830         num_pages = ALIGN(efi.memmap.nr_map * efi.memmap.desc_size, PAGE_SIZE);
831         num_pages >>= PAGE_SHIFT;
832
833         if (efi_setup_page_tables(efi.memmap.phys_map, num_pages)) {
834                 clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
835                 return;
836         }
837
838         efi_sync_low_kernel_mappings();
839
840         /*
841          * Now that EFI is in virtual mode, update the function
842          * pointers in the runtime service table to the new virtual addresses.
843          *
844          * Call EFI services through wrapper functions.
845          */
846         efi.runtime_version = efi_systab.hdr.revision;
847
848         efi_native_runtime_setup();
849
850         efi.set_virtual_address_map = NULL;
851
852         if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX))
853                 runtime_code_page_mkexec();
854
855         /* clean DUMMY object */
856         efi_delete_dummy_variable();
857 #endif
858 }
859
860 /*
861  * This function will switch the EFI runtime services to virtual mode.
862  * Essentially, we look through the EFI memmap and map every region that
863  * has the runtime attribute bit set in its memory descriptor into the
864  * efi_pgd page table.
865  *
866  * The old method which used to update that memory descriptor with the
867  * virtual address obtained from ioremap() is still supported when the
868  * kernel is booted with efi=old_map on its command line. Same old
869  * method enabled the runtime services to be called without having to
870  * thunk back into physical mode for every invocation.
871  *
872  * The new method does a pagetable switch in a preemption-safe manner
873  * so that we're in a different address space when calling a runtime
874  * function. For function arguments passing we do copy the PUDs of the
875  * kernel page table into efi_pgd prior to each call.
876  *
877  * Specially for kexec boot, efi runtime maps in previous kernel should
878  * be passed in via setup_data. In that case runtime ranges will be mapped
879  * to the same virtual addresses as the first kernel, see
880  * kexec_enter_virtual_mode().
881  */
882 static void __init __efi_enter_virtual_mode(void)
883 {
884         int count = 0, pg_shift = 0;
885         void *new_memmap = NULL;
886         efi_status_t status;
887
888         efi.systab = NULL;
889
890         if (efi_alloc_page_tables()) {
891                 pr_err("Failed to allocate EFI page tables\n");
892                 clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
893                 return;
894         }
895
896         efi_merge_regions();
897         new_memmap = efi_map_regions(&count, &pg_shift);
898         if (!new_memmap) {
899                 pr_err("Error reallocating memory, EFI runtime non-functional!\n");
900                 clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
901                 return;
902         }
903
904         save_runtime_map();
905
906         BUG_ON(!efi.systab);
907
908         if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift)) {
909                 clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
910                 return;
911         }
912
913         efi_sync_low_kernel_mappings();
914
915         if (efi_is_native()) {
916                 status = phys_efi_set_virtual_address_map(
917                                 efi.memmap.desc_size * count,
918                                 efi.memmap.desc_size,
919                                 efi.memmap.desc_version,
920                                 (efi_memory_desc_t *)__pa(new_memmap));
921         } else {
922                 status = efi_thunk_set_virtual_address_map(
923                                 efi_phys.set_virtual_address_map,
924                                 efi.memmap.desc_size * count,
925                                 efi.memmap.desc_size,
926                                 efi.memmap.desc_version,
927                                 (efi_memory_desc_t *)__pa(new_memmap));
928         }
929
930         if (status != EFI_SUCCESS) {
931                 pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n",
932                          status);
933                 panic("EFI call to SetVirtualAddressMap() failed!");
934         }
935
936         /*
937          * Now that EFI is in virtual mode, update the function
938          * pointers in the runtime service table to the new virtual addresses.
939          *
940          * Call EFI services through wrapper functions.
941          */
942         efi.runtime_version = efi_systab.hdr.revision;
943
944         if (efi_is_native())
945                 efi_native_runtime_setup();
946         else
947                 efi_thunk_runtime_setup();
948
949         efi.set_virtual_address_map = NULL;
950
951         /*
952          * Apply more restrictive page table mapping attributes now that
953          * SVAM() has been called and the firmware has performed all
954          * necessary relocation fixups for the new virtual addresses.
955          */
956         efi_runtime_update_mappings();
957         efi_dump_pagetable();
958
959         /*
960          * We mapped the descriptor array into the EFI pagetable above
961          * but we're not unmapping it here because if we're running in
962          * EFI mixed mode we need all of memory to be accessible when
963          * we pass parameters to the EFI runtime services in the
964          * thunking code.
965          */
966         free_pages((unsigned long)new_memmap, pg_shift);
967
968         /* clean DUMMY object */
969         efi_delete_dummy_variable();
970 }
971
972 void __init efi_enter_virtual_mode(void)
973 {
974         if (efi_enabled(EFI_PARAVIRT))
975                 return;
976
977         if (efi_setup)
978                 kexec_enter_virtual_mode();
979         else
980                 __efi_enter_virtual_mode();
981 }
982
983 /*
984  * Convenience functions to obtain memory types and attributes
985  */
986 u32 efi_mem_type(unsigned long phys_addr)
987 {
988         efi_memory_desc_t *md;
989
990         if (!efi_enabled(EFI_MEMMAP))
991                 return 0;
992
993         for_each_efi_memory_desc(md) {
994                 if ((md->phys_addr <= phys_addr) &&
995                     (phys_addr < (md->phys_addr +
996                                   (md->num_pages << EFI_PAGE_SHIFT))))
997                         return md->type;
998         }
999         return 0;
1000 }
1001
1002 static int __init arch_parse_efi_cmdline(char *str)
1003 {
1004         if (!str) {
1005                 pr_warn("need at least one option\n");
1006                 return -EINVAL;
1007         }
1008
1009         if (parse_option_str(str, "old_map"))
1010                 set_bit(EFI_OLD_MEMMAP, &efi.flags);
1011
1012         return 0;
1013 }
1014 early_param("efi", arch_parse_efi_cmdline);