2 * Set up the VMAs to tell the VM about the vDSO.
3 * Copyright 2007 Andi Kleen, SUSE Labs.
4 * Subject to the GPL, v.2
8 #include <linux/sched.h>
9 #include <linux/slab.h>
10 #include <linux/init.h>
11 #include <linux/random.h>
12 #include <linux/elf.h>
13 #include <linux/cpu.h>
14 #include <asm/vsyscall.h>
15 #include <asm/vgtod.h>
16 #include <asm/proto.h>
22 #if defined(CONFIG_X86_64)
23 unsigned int __read_mostly vdso64_enabled = 1;
25 extern unsigned short vdso_sync_cpuid;
28 void __init init_vdso_image(const struct vdso_image *image)
31 int npages = (image->size) / PAGE_SIZE;
33 BUG_ON(image->size % PAGE_SIZE != 0);
34 for (i = 0; i < npages; i++)
35 image->text_mapping.pages[i] =
36 virt_to_page(image->data + i*PAGE_SIZE);
38 apply_alternatives((struct alt_instr *)(image->data + image->alt),
39 (struct alt_instr *)(image->data + image->alt +
43 #if defined(CONFIG_X86_64)
44 static int __init init_vdso(void)
46 init_vdso_image(&vdso_image_64);
48 #ifdef CONFIG_X86_X32_ABI
49 init_vdso_image(&vdso_image_x32);
54 subsys_initcall(init_vdso);
59 /* Put the vdso above the (randomized) stack with another randomized offset.
60 This way there is no hole in the middle of address space.
61 To save memory make sure it is still in the same PTE as the stack top.
62 This doesn't give that many random bits.
64 Only used for the 64-bit and x32 vdsos. */
65 static unsigned long vdso_addr(unsigned long start, unsigned len)
70 unsigned long addr, end;
72 end = (start + PMD_SIZE - 1) & PMD_MASK;
73 if (end >= TASK_SIZE_MAX)
76 /* This loses some more bits than a modulo, but is cheaper */
77 offset = get_random_int() & (PTRS_PER_PTE - 1);
78 addr = start + (offset << PAGE_SHIFT);
83 * page-align it here so that get_unmapped_area doesn't
84 * align it wrongfully again to the next page. addr can come in 4K
85 * unaligned here as a result of stack start randomization.
87 addr = PAGE_ALIGN(addr);
88 addr = align_vdso_addr(addr);
94 static int map_vdso(const struct vdso_image *image, bool calculate_addr)
96 struct mm_struct *mm = current->mm;
97 struct vm_area_struct *vma;
98 unsigned long addr, text_start;
100 static struct page *no_pages[] = {NULL};
101 static struct vm_special_mapping vvar_mapping = {
106 if (calculate_addr) {
107 addr = vdso_addr(current->mm->start_stack,
108 image->size - image->sym_vvar_start);
113 down_write(&mm->mmap_sem);
115 addr = get_unmapped_area(NULL, addr,
116 image->size - image->sym_vvar_start, 0, 0);
117 if (IS_ERR_VALUE(addr)) {
122 text_start = addr - image->sym_vvar_start;
123 current->mm->context.vdso = (void __user *)text_start;
126 * MAYWRITE to allow gdb to COW and set breakpoints
128 vma = _install_special_mapping(mm,
132 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
133 &image->text_mapping);
140 vma = _install_special_mapping(mm,
142 -image->sym_vvar_start,
151 if (image->sym_vvar_page)
152 ret = remap_pfn_range(vma,
153 text_start + image->sym_vvar_page,
154 __pa_symbol(&__vvar_page) >> PAGE_SHIFT,
161 #ifdef CONFIG_HPET_TIMER
162 if (hpet_address && image->sym_hpet_page) {
163 ret = io_remap_pfn_range(vma,
164 text_start + image->sym_hpet_page,
165 hpet_address >> PAGE_SHIFT,
167 pgprot_noncached(PAGE_READONLY));
176 current->mm->context.vdso = NULL;
178 up_write(&mm->mmap_sem);
182 #if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
183 static int load_vdso32(void)
187 if (vdso32_enabled != 1) /* Other values all mean "disabled" */
190 ret = map_vdso(selected_vdso32, false);
194 if (selected_vdso32->sym_VDSO32_SYSENTER_RETURN)
195 current_thread_info()->sysenter_return =
196 current->mm->context.vdso +
197 selected_vdso32->sym_VDSO32_SYSENTER_RETURN;
204 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
209 return map_vdso(&vdso_image_64, true);
213 int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
216 #ifdef CONFIG_X86_X32_ABI
217 if (test_thread_flag(TIF_X32)) {
221 return map_vdso(&vdso_image_x32, true);
225 return load_vdso32();
229 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
231 return load_vdso32();
236 static __init int vdso_setup(char *s)
238 vdso64_enabled = simple_strtoul(s, NULL, 0);
241 __setup("vdso=", vdso_setup);
246 * Assume __initcall executes before all user space. Hopefully kmod
247 * doesn't violate that. We'll find out if it does.
249 static void vsyscall_set_cpu(int cpu)
252 unsigned long node = 0;
254 node = cpu_to_node(cpu);
256 if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
257 write_rdtscp_aux((node << 12) | cpu);
260 * Store cpu number in limit so that it can be loaded quickly
261 * in user space in vgetcpu. (12 bits for the CPU and 8 bits for the node)
263 d = 0x0f40000000000ULL;
265 d |= (node & 0xf) << 12;
266 d |= (node >> 4) << 48;
268 write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
271 static void cpu_vsyscall_init(void *arg)
273 /* preemption should be already off */
274 vsyscall_set_cpu(raw_smp_processor_id());
278 cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
280 long cpu = (long)arg;
282 if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
283 smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1);
288 static int __init vsyscall_init(void)
290 cpu_notifier_register_begin();
292 on_each_cpu(cpu_vsyscall_init, NULL, 1);
293 /* notifier priority > KVM */
294 __hotcpu_notifier(cpu_vsyscall_notifier, 30);
296 cpu_notifier_register_done();
300 __initcall(vsyscall_init);