x86_64/vdso: Move getcpu code from vsyscall_64.c to vdso/vma.c
[cascardo/linux.git] / arch / x86 / vdso / vma.c
1 /*
2  * Set up the VMAs to tell the VM about the vDSO.
3  * Copyright 2007 Andi Kleen, SUSE Labs.
4  * Subject to the GPL, v.2
5  */
6 #include <linux/mm.h>
7 #include <linux/err.h>
8 #include <linux/sched.h>
9 #include <linux/slab.h>
10 #include <linux/init.h>
11 #include <linux/random.h>
12 #include <linux/elf.h>
13 #include <linux/cpu.h>
14 #include <asm/vsyscall.h>
15 #include <asm/vgtod.h>
16 #include <asm/proto.h>
17 #include <asm/vdso.h>
18 #include <asm/page.h>
19 #include <asm/hpet.h>
20 #include <asm/desc.h>
21
22 #if defined(CONFIG_X86_64)
23 unsigned int __read_mostly vdso64_enabled = 1;
24
25 extern unsigned short vdso_sync_cpuid;
26 #endif
27
28 void __init init_vdso_image(const struct vdso_image *image)
29 {
30         int i;
31         int npages = (image->size) / PAGE_SIZE;
32
33         BUG_ON(image->size % PAGE_SIZE != 0);
34         for (i = 0; i < npages; i++)
35                 image->text_mapping.pages[i] =
36                         virt_to_page(image->data + i*PAGE_SIZE);
37
38         apply_alternatives((struct alt_instr *)(image->data + image->alt),
39                            (struct alt_instr *)(image->data + image->alt +
40                                                 image->alt_len));
41 }
42
43 #if defined(CONFIG_X86_64)
44 static int __init init_vdso(void)
45 {
46         init_vdso_image(&vdso_image_64);
47
48 #ifdef CONFIG_X86_X32_ABI
49         init_vdso_image(&vdso_image_x32);
50 #endif
51
52         return 0;
53 }
54 subsys_initcall(init_vdso);
55 #endif
56
57 struct linux_binprm;
58
59 /* Put the vdso above the (randomized) stack with another randomized offset.
60    This way there is no hole in the middle of address space.
61    To save memory make sure it is still in the same PTE as the stack top.
62    This doesn't give that many random bits.
63
64    Only used for the 64-bit and x32 vdsos. */
65 static unsigned long vdso_addr(unsigned long start, unsigned len)
66 {
67 #ifdef CONFIG_X86_32
68         return 0;
69 #else
70         unsigned long addr, end;
71         unsigned offset;
72         end = (start + PMD_SIZE - 1) & PMD_MASK;
73         if (end >= TASK_SIZE_MAX)
74                 end = TASK_SIZE_MAX;
75         end -= len;
76         /* This loses some more bits than a modulo, but is cheaper */
77         offset = get_random_int() & (PTRS_PER_PTE - 1);
78         addr = start + (offset << PAGE_SHIFT);
79         if (addr >= end)
80                 addr = end;
81
82         /*
83          * page-align it here so that get_unmapped_area doesn't
84          * align it wrongfully again to the next page. addr can come in 4K
85          * unaligned here as a result of stack start randomization.
86          */
87         addr = PAGE_ALIGN(addr);
88         addr = align_vdso_addr(addr);
89
90         return addr;
91 #endif
92 }
93
94 static int map_vdso(const struct vdso_image *image, bool calculate_addr)
95 {
96         struct mm_struct *mm = current->mm;
97         struct vm_area_struct *vma;
98         unsigned long addr, text_start;
99         int ret = 0;
100         static struct page *no_pages[] = {NULL};
101         static struct vm_special_mapping vvar_mapping = {
102                 .name = "[vvar]",
103                 .pages = no_pages,
104         };
105
106         if (calculate_addr) {
107                 addr = vdso_addr(current->mm->start_stack,
108                                  image->size - image->sym_vvar_start);
109         } else {
110                 addr = 0;
111         }
112
113         down_write(&mm->mmap_sem);
114
115         addr = get_unmapped_area(NULL, addr,
116                                  image->size - image->sym_vvar_start, 0, 0);
117         if (IS_ERR_VALUE(addr)) {
118                 ret = addr;
119                 goto up_fail;
120         }
121
122         text_start = addr - image->sym_vvar_start;
123         current->mm->context.vdso = (void __user *)text_start;
124
125         /*
126          * MAYWRITE to allow gdb to COW and set breakpoints
127          */
128         vma = _install_special_mapping(mm,
129                                        text_start,
130                                        image->size,
131                                        VM_READ|VM_EXEC|
132                                        VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
133                                        &image->text_mapping);
134
135         if (IS_ERR(vma)) {
136                 ret = PTR_ERR(vma);
137                 goto up_fail;
138         }
139
140         vma = _install_special_mapping(mm,
141                                        addr,
142                                        -image->sym_vvar_start,
143                                        VM_READ|VM_MAYREAD,
144                                        &vvar_mapping);
145
146         if (IS_ERR(vma)) {
147                 ret = PTR_ERR(vma);
148                 goto up_fail;
149         }
150
151         if (image->sym_vvar_page)
152                 ret = remap_pfn_range(vma,
153                                       text_start + image->sym_vvar_page,
154                                       __pa_symbol(&__vvar_page) >> PAGE_SHIFT,
155                                       PAGE_SIZE,
156                                       PAGE_READONLY);
157
158         if (ret)
159                 goto up_fail;
160
161 #ifdef CONFIG_HPET_TIMER
162         if (hpet_address && image->sym_hpet_page) {
163                 ret = io_remap_pfn_range(vma,
164                         text_start + image->sym_hpet_page,
165                         hpet_address >> PAGE_SHIFT,
166                         PAGE_SIZE,
167                         pgprot_noncached(PAGE_READONLY));
168
169                 if (ret)
170                         goto up_fail;
171         }
172 #endif
173
174 up_fail:
175         if (ret)
176                 current->mm->context.vdso = NULL;
177
178         up_write(&mm->mmap_sem);
179         return ret;
180 }
181
182 #if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
183 static int load_vdso32(void)
184 {
185         int ret;
186
187         if (vdso32_enabled != 1)  /* Other values all mean "disabled" */
188                 return 0;
189
190         ret = map_vdso(selected_vdso32, false);
191         if (ret)
192                 return ret;
193
194         if (selected_vdso32->sym_VDSO32_SYSENTER_RETURN)
195                 current_thread_info()->sysenter_return =
196                         current->mm->context.vdso +
197                         selected_vdso32->sym_VDSO32_SYSENTER_RETURN;
198
199         return 0;
200 }
201 #endif
202
203 #ifdef CONFIG_X86_64
204 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
205 {
206         if (!vdso64_enabled)
207                 return 0;
208
209         return map_vdso(&vdso_image_64, true);
210 }
211
212 #ifdef CONFIG_COMPAT
213 int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
214                                        int uses_interp)
215 {
216 #ifdef CONFIG_X86_X32_ABI
217         if (test_thread_flag(TIF_X32)) {
218                 if (!vdso64_enabled)
219                         return 0;
220
221                 return map_vdso(&vdso_image_x32, true);
222         }
223 #endif
224
225         return load_vdso32();
226 }
227 #endif
228 #else
229 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
230 {
231         return load_vdso32();
232 }
233 #endif
234
235 #ifdef CONFIG_X86_64
236 static __init int vdso_setup(char *s)
237 {
238         vdso64_enabled = simple_strtoul(s, NULL, 0);
239         return 0;
240 }
241 __setup("vdso=", vdso_setup);
242 #endif
243
244 #ifdef CONFIG_X86_64
245 /*
246  * Assume __initcall executes before all user space. Hopefully kmod
247  * doesn't violate that. We'll find out if it does.
248  */
249 static void vsyscall_set_cpu(int cpu)
250 {
251         unsigned long d;
252         unsigned long node = 0;
253 #ifdef CONFIG_NUMA
254         node = cpu_to_node(cpu);
255 #endif
256         if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
257                 write_rdtscp_aux((node << 12) | cpu);
258
259         /*
260          * Store cpu number in limit so that it can be loaded quickly
261          * in user space in vgetcpu. (12 bits for the CPU and 8 bits for the node)
262          */
263         d = 0x0f40000000000ULL;
264         d |= cpu;
265         d |= (node & 0xf) << 12;
266         d |= (node >> 4) << 48;
267
268         write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
269 }
270
271 static void cpu_vsyscall_init(void *arg)
272 {
273         /* preemption should be already off */
274         vsyscall_set_cpu(raw_smp_processor_id());
275 }
276
277 static int
278 cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
279 {
280         long cpu = (long)arg;
281
282         if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
283                 smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1);
284
285         return NOTIFY_DONE;
286 }
287
288 static int __init vsyscall_init(void)
289 {
290         cpu_notifier_register_begin();
291
292         on_each_cpu(cpu_vsyscall_init, NULL, 1);
293         /* notifier priority > KVM */
294         __hotcpu_notifier(cpu_vsyscall_notifier, 30);
295
296         cpu_notifier_register_done();
297
298         return 0;
299 }
300 __initcall(vsyscall_init);
301 #endif