Merge branch 'x86/numa' into x86/urgent
[cascardo/linux.git] / drivers / xen / xenfs / privcmd.c
1 /******************************************************************************
2  * privcmd.c
3  *
4  * Interface to privileged domain-0 commands.
5  *
6  * Copyright (c) 2002-2004, K A Fraser, B Dragovic
7  */
8
9 #include <linux/kernel.h>
10 #include <linux/sched.h>
11 #include <linux/slab.h>
12 #include <linux/string.h>
13 #include <linux/errno.h>
14 #include <linux/mm.h>
15 #include <linux/mman.h>
16 #include <linux/uaccess.h>
17 #include <linux/swap.h>
18 #include <linux/highmem.h>
19 #include <linux/pagemap.h>
20 #include <linux/seq_file.h>
21
22 #include <asm/pgalloc.h>
23 #include <asm/pgtable.h>
24 #include <asm/tlb.h>
25 #include <asm/xen/hypervisor.h>
26 #include <asm/xen/hypercall.h>
27
28 #include <xen/xen.h>
29 #include <xen/privcmd.h>
30 #include <xen/interface/xen.h>
31 #include <xen/features.h>
32 #include <xen/page.h>
33 #include <xen/xen-ops.h>
34
35 #ifndef HAVE_ARCH_PRIVCMD_MMAP
36 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
37 #endif
38
39 static long privcmd_ioctl_hypercall(void __user *udata)
40 {
41         struct privcmd_hypercall hypercall;
42         long ret;
43
44         if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
45                 return -EFAULT;
46
47         ret = privcmd_call(hypercall.op,
48                            hypercall.arg[0], hypercall.arg[1],
49                            hypercall.arg[2], hypercall.arg[3],
50                            hypercall.arg[4]);
51
52         return ret;
53 }
54
55 static void free_page_list(struct list_head *pages)
56 {
57         struct page *p, *n;
58
59         list_for_each_entry_safe(p, n, pages, lru)
60                 __free_page(p);
61
62         INIT_LIST_HEAD(pages);
63 }
64
65 /*
66  * Given an array of items in userspace, return a list of pages
67  * containing the data.  If copying fails, either because of memory
68  * allocation failure or a problem reading user memory, return an
69  * error code; its up to the caller to dispose of any partial list.
70  */
71 static int gather_array(struct list_head *pagelist,
72                         unsigned nelem, size_t size,
73                         void __user *data)
74 {
75         unsigned pageidx;
76         void *pagedata;
77         int ret;
78
79         if (size > PAGE_SIZE)
80                 return 0;
81
82         pageidx = PAGE_SIZE;
83         pagedata = NULL;        /* quiet, gcc */
84         while (nelem--) {
85                 if (pageidx > PAGE_SIZE-size) {
86                         struct page *page = alloc_page(GFP_KERNEL);
87
88                         ret = -ENOMEM;
89                         if (page == NULL)
90                                 goto fail;
91
92                         pagedata = page_address(page);
93
94                         list_add_tail(&page->lru, pagelist);
95                         pageidx = 0;
96                 }
97
98                 ret = -EFAULT;
99                 if (copy_from_user(pagedata + pageidx, data, size))
100                         goto fail;
101
102                 data += size;
103                 pageidx += size;
104         }
105
106         ret = 0;
107
108 fail:
109         return ret;
110 }
111
112 /*
113  * Call function "fn" on each element of the array fragmented
114  * over a list of pages.
115  */
116 static int traverse_pages(unsigned nelem, size_t size,
117                           struct list_head *pos,
118                           int (*fn)(void *data, void *state),
119                           void *state)
120 {
121         void *pagedata;
122         unsigned pageidx;
123         int ret = 0;
124
125         BUG_ON(size > PAGE_SIZE);
126
127         pageidx = PAGE_SIZE;
128         pagedata = NULL;        /* hush, gcc */
129
130         while (nelem--) {
131                 if (pageidx > PAGE_SIZE-size) {
132                         struct page *page;
133                         pos = pos->next;
134                         page = list_entry(pos, struct page, lru);
135                         pagedata = page_address(page);
136                         pageidx = 0;
137                 }
138
139                 ret = (*fn)(pagedata + pageidx, state);
140                 if (ret)
141                         break;
142                 pageidx += size;
143         }
144
145         return ret;
146 }
147
148 struct mmap_mfn_state {
149         unsigned long va;
150         struct vm_area_struct *vma;
151         domid_t domain;
152 };
153
154 static int mmap_mfn_range(void *data, void *state)
155 {
156         struct privcmd_mmap_entry *msg = data;
157         struct mmap_mfn_state *st = state;
158         struct vm_area_struct *vma = st->vma;
159         int rc;
160
161         /* Do not allow range to wrap the address space. */
162         if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
163             ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va))
164                 return -EINVAL;
165
166         /* Range chunks must be contiguous in va space. */
167         if ((msg->va != st->va) ||
168             ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
169                 return -EINVAL;
170
171         rc = xen_remap_domain_mfn_range(vma,
172                                         msg->va & PAGE_MASK,
173                                         msg->mfn, msg->npages,
174                                         vma->vm_page_prot,
175                                         st->domain);
176         if (rc < 0)
177                 return rc;
178
179         st->va += msg->npages << PAGE_SHIFT;
180
181         return 0;
182 }
183
184 static long privcmd_ioctl_mmap(void __user *udata)
185 {
186         struct privcmd_mmap mmapcmd;
187         struct mm_struct *mm = current->mm;
188         struct vm_area_struct *vma;
189         int rc;
190         LIST_HEAD(pagelist);
191         struct mmap_mfn_state state;
192
193         if (!xen_initial_domain())
194                 return -EPERM;
195
196         if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
197                 return -EFAULT;
198
199         rc = gather_array(&pagelist,
200                           mmapcmd.num, sizeof(struct privcmd_mmap_entry),
201                           mmapcmd.entry);
202
203         if (rc || list_empty(&pagelist))
204                 goto out;
205
206         down_write(&mm->mmap_sem);
207
208         {
209                 struct page *page = list_first_entry(&pagelist,
210                                                      struct page, lru);
211                 struct privcmd_mmap_entry *msg = page_address(page);
212
213                 vma = find_vma(mm, msg->va);
214                 rc = -EINVAL;
215
216                 if (!vma || (msg->va != vma->vm_start) ||
217                     !privcmd_enforce_singleshot_mapping(vma))
218                         goto out_up;
219         }
220
221         state.va = vma->vm_start;
222         state.vma = vma;
223         state.domain = mmapcmd.dom;
224
225         rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
226                             &pagelist,
227                             mmap_mfn_range, &state);
228
229
230 out_up:
231         up_write(&mm->mmap_sem);
232
233 out:
234         free_page_list(&pagelist);
235
236         return rc;
237 }
238
239 struct mmap_batch_state {
240         domid_t domain;
241         unsigned long va;
242         struct vm_area_struct *vma;
243         int err;
244
245         xen_pfn_t __user *user;
246 };
247
248 static int mmap_batch_fn(void *data, void *state)
249 {
250         xen_pfn_t *mfnp = data;
251         struct mmap_batch_state *st = state;
252
253         if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
254                                        st->vma->vm_page_prot, st->domain) < 0) {
255                 *mfnp |= 0xf0000000U;
256                 st->err++;
257         }
258         st->va += PAGE_SIZE;
259
260         return 0;
261 }
262
263 static int mmap_return_errors(void *data, void *state)
264 {
265         xen_pfn_t *mfnp = data;
266         struct mmap_batch_state *st = state;
267
268         return put_user(*mfnp, st->user++);
269 }
270
271 static struct vm_operations_struct privcmd_vm_ops;
272
273 static long privcmd_ioctl_mmap_batch(void __user *udata)
274 {
275         int ret;
276         struct privcmd_mmapbatch m;
277         struct mm_struct *mm = current->mm;
278         struct vm_area_struct *vma;
279         unsigned long nr_pages;
280         LIST_HEAD(pagelist);
281         struct mmap_batch_state state;
282
283         if (!xen_initial_domain())
284                 return -EPERM;
285
286         if (copy_from_user(&m, udata, sizeof(m)))
287                 return -EFAULT;
288
289         nr_pages = m.num;
290         if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
291                 return -EINVAL;
292
293         ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
294                            m.arr);
295
296         if (ret || list_empty(&pagelist))
297                 goto out;
298
299         down_write(&mm->mmap_sem);
300
301         vma = find_vma(mm, m.addr);
302         ret = -EINVAL;
303         if (!vma ||
304             vma->vm_ops != &privcmd_vm_ops ||
305             (m.addr != vma->vm_start) ||
306             ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
307             !privcmd_enforce_singleshot_mapping(vma)) {
308                 up_write(&mm->mmap_sem);
309                 goto out;
310         }
311
312         state.domain = m.dom;
313         state.vma = vma;
314         state.va = m.addr;
315         state.err = 0;
316
317         ret = traverse_pages(m.num, sizeof(xen_pfn_t),
318                              &pagelist, mmap_batch_fn, &state);
319
320         up_write(&mm->mmap_sem);
321
322         if (state.err > 0) {
323                 state.user = m.arr;
324                 ret = traverse_pages(m.num, sizeof(xen_pfn_t),
325                                &pagelist,
326                                mmap_return_errors, &state);
327         }
328
329 out:
330         free_page_list(&pagelist);
331
332         return ret;
333 }
334
335 static long privcmd_ioctl(struct file *file,
336                           unsigned int cmd, unsigned long data)
337 {
338         int ret = -ENOSYS;
339         void __user *udata = (void __user *) data;
340
341         switch (cmd) {
342         case IOCTL_PRIVCMD_HYPERCALL:
343                 ret = privcmd_ioctl_hypercall(udata);
344                 break;
345
346         case IOCTL_PRIVCMD_MMAP:
347                 ret = privcmd_ioctl_mmap(udata);
348                 break;
349
350         case IOCTL_PRIVCMD_MMAPBATCH:
351                 ret = privcmd_ioctl_mmap_batch(udata);
352                 break;
353
354         default:
355                 ret = -EINVAL;
356                 break;
357         }
358
359         return ret;
360 }
361
362 #ifndef HAVE_ARCH_PRIVCMD_MMAP
363 static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
364 {
365         printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
366                vma, vma->vm_start, vma->vm_end,
367                vmf->pgoff, vmf->virtual_address);
368
369         return VM_FAULT_SIGBUS;
370 }
371
372 static struct vm_operations_struct privcmd_vm_ops = {
373         .fault = privcmd_fault
374 };
375
376 static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
377 {
378         /* Unsupported for auto-translate guests. */
379         if (xen_feature(XENFEAT_auto_translated_physmap))
380                 return -ENOSYS;
381
382         /* DONTCOPY is essential for Xen because copy_page_range doesn't know
383          * how to recreate these mappings */
384         vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
385         vma->vm_ops = &privcmd_vm_ops;
386         vma->vm_private_data = NULL;
387
388         return 0;
389 }
390
391 static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
392 {
393         return (xchg(&vma->vm_private_data, (void *)1) == NULL);
394 }
395 #endif
396
397 const struct file_operations privcmd_file_ops = {
398         .unlocked_ioctl = privcmd_ioctl,
399         .mmap = privcmd_mmap,
400 };