Merge tag 'modules-next-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[cascardo/linux.git] / arch / s390 / mm / vmem.c
1 /*
2  *    Copyright IBM Corp. 2006
3  *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
4  */
5
6 #include <linux/bootmem.h>
7 #include <linux/pfn.h>
8 #include <linux/mm.h>
9 #include <linux/module.h>
10 #include <linux/list.h>
11 #include <linux/hugetlb.h>
12 #include <linux/slab.h>
13 #include <asm/pgalloc.h>
14 #include <asm/pgtable.h>
15 #include <asm/setup.h>
16 #include <asm/tlbflush.h>
17 #include <asm/sections.h>
18
19 static DEFINE_MUTEX(vmem_mutex);
20
21 struct memory_segment {
22         struct list_head list;
23         unsigned long start;
24         unsigned long size;
25 };
26
27 static LIST_HEAD(mem_segs);
28
29 static void __ref *vmem_alloc_pages(unsigned int order)
30 {
31         if (slab_is_available())
32                 return (void *)__get_free_pages(GFP_KERNEL, order);
33         return alloc_bootmem_pages((1 << order) * PAGE_SIZE);
34 }
35
36 static inline pud_t *vmem_pud_alloc(void)
37 {
38         pud_t *pud = NULL;
39
40 #ifdef CONFIG_64BIT
41         pud = vmem_alloc_pages(2);
42         if (!pud)
43                 return NULL;
44         clear_table((unsigned long *) pud, _REGION3_ENTRY_EMPTY, PAGE_SIZE * 4);
45 #endif
46         return pud;
47 }
48
49 static inline pmd_t *vmem_pmd_alloc(void)
50 {
51         pmd_t *pmd = NULL;
52
53 #ifdef CONFIG_64BIT
54         pmd = vmem_alloc_pages(2);
55         if (!pmd)
56                 return NULL;
57         clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE * 4);
58 #endif
59         return pmd;
60 }
61
62 static pte_t __ref *vmem_pte_alloc(unsigned long address)
63 {
64         pte_t *pte;
65
66         if (slab_is_available())
67                 pte = (pte_t *) page_table_alloc(&init_mm, address);
68         else
69                 pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t));
70         if (!pte)
71                 return NULL;
72         clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY,
73                     PTRS_PER_PTE * sizeof(pte_t));
74         return pte;
75 }
76
77 /*
78  * Add a physical memory range to the 1:1 mapping.
79  */
80 static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
81 {
82         unsigned long end = start + size;
83         unsigned long address = start;
84         pgd_t *pg_dir;
85         pud_t *pu_dir;
86         pmd_t *pm_dir;
87         pte_t *pt_dir;
88         pte_t  pte;
89         int ret = -ENOMEM;
90
91         while (address < end) {
92                 pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0));
93                 pg_dir = pgd_offset_k(address);
94                 if (pgd_none(*pg_dir)) {
95                         pu_dir = vmem_pud_alloc();
96                         if (!pu_dir)
97                                 goto out;
98                         pgd_populate(&init_mm, pg_dir, pu_dir);
99                 }
100                 pu_dir = pud_offset(pg_dir, address);
101 #if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC)
102                 if (MACHINE_HAS_EDAT2 && pud_none(*pu_dir) && address &&
103                     !(address & ~PUD_MASK) && (address + PUD_SIZE <= end)) {
104                         pte_val(pte) |= _REGION3_ENTRY_LARGE;
105                         pte_val(pte) |= _REGION_ENTRY_TYPE_R3;
106                         pud_val(*pu_dir) = pte_val(pte);
107                         address += PUD_SIZE;
108                         continue;
109                 }
110 #endif
111                 if (pud_none(*pu_dir)) {
112                         pm_dir = vmem_pmd_alloc();
113                         if (!pm_dir)
114                                 goto out;
115                         pud_populate(&init_mm, pu_dir, pm_dir);
116                 }
117                 pm_dir = pmd_offset(pu_dir, address);
118 #if defined(CONFIG_64BIT) && !defined(CONFIG_DEBUG_PAGEALLOC)
119                 if (MACHINE_HAS_EDAT1 && pmd_none(*pm_dir) && address &&
120                     !(address & ~PMD_MASK) && (address + PMD_SIZE <= end)) {
121                         pte_val(pte) |= _SEGMENT_ENTRY_LARGE;
122                         pmd_val(*pm_dir) = pte_val(pte);
123                         address += PMD_SIZE;
124                         continue;
125                 }
126 #endif
127                 if (pmd_none(*pm_dir)) {
128                         pt_dir = vmem_pte_alloc(address);
129                         if (!pt_dir)
130                                 goto out;
131                         pmd_populate(&init_mm, pm_dir, pt_dir);
132                 }
133
134                 pt_dir = pte_offset_kernel(pm_dir, address);
135                 *pt_dir = pte;
136                 address += PAGE_SIZE;
137         }
138         ret = 0;
139 out:
140         flush_tlb_kernel_range(start, end);
141         return ret;
142 }
143
144 /*
145  * Remove a physical memory range from the 1:1 mapping.
146  * Currently only invalidates page table entries.
147  */
148 static void vmem_remove_range(unsigned long start, unsigned long size)
149 {
150         unsigned long end = start + size;
151         unsigned long address = start;
152         pgd_t *pg_dir;
153         pud_t *pu_dir;
154         pmd_t *pm_dir;
155         pte_t *pt_dir;
156         pte_t  pte;
157
158         pte_val(pte) = _PAGE_TYPE_EMPTY;
159         while (address < end) {
160                 pg_dir = pgd_offset_k(address);
161                 if (pgd_none(*pg_dir)) {
162                         address += PGDIR_SIZE;
163                         continue;
164                 }
165                 pu_dir = pud_offset(pg_dir, address);
166                 if (pud_none(*pu_dir)) {
167                         address += PUD_SIZE;
168                         continue;
169                 }
170                 if (pud_large(*pu_dir)) {
171                         pud_clear(pu_dir);
172                         address += PUD_SIZE;
173                         continue;
174                 }
175                 pm_dir = pmd_offset(pu_dir, address);
176                 if (pmd_none(*pm_dir)) {
177                         address += PMD_SIZE;
178                         continue;
179                 }
180                 if (pmd_large(*pm_dir)) {
181                         pmd_clear(pm_dir);
182                         address += PMD_SIZE;
183                         continue;
184                 }
185                 pt_dir = pte_offset_kernel(pm_dir, address);
186                 *pt_dir = pte;
187                 address += PAGE_SIZE;
188         }
189         flush_tlb_kernel_range(start, end);
190 }
191
192 /*
193  * Add a backed mem_map array to the virtual mem_map array.
194  */
195 int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
196 {
197         unsigned long address, start_addr, end_addr;
198         pgd_t *pg_dir;
199         pud_t *pu_dir;
200         pmd_t *pm_dir;
201         pte_t *pt_dir;
202         pte_t  pte;
203         int ret = -ENOMEM;
204
205         start_addr = (unsigned long) start;
206         end_addr = (unsigned long) (start + nr);
207
208         for (address = start_addr; address < end_addr;) {
209                 pg_dir = pgd_offset_k(address);
210                 if (pgd_none(*pg_dir)) {
211                         pu_dir = vmem_pud_alloc();
212                         if (!pu_dir)
213                                 goto out;
214                         pgd_populate(&init_mm, pg_dir, pu_dir);
215                 }
216
217                 pu_dir = pud_offset(pg_dir, address);
218                 if (pud_none(*pu_dir)) {
219                         pm_dir = vmem_pmd_alloc();
220                         if (!pm_dir)
221                                 goto out;
222                         pud_populate(&init_mm, pu_dir, pm_dir);
223                 }
224
225                 pm_dir = pmd_offset(pu_dir, address);
226                 if (pmd_none(*pm_dir)) {
227 #ifdef CONFIG_64BIT
228                         /* Use 1MB frames for vmemmap if available. We always
229                          * use large frames even if they are only partially
230                          * used.
231                          * Otherwise we would have also page tables since
232                          * vmemmap_populate gets called for each section
233                          * separately. */
234                         if (MACHINE_HAS_EDAT1) {
235                                 void *new_page;
236
237                                 new_page = vmemmap_alloc_block(PMD_SIZE, node);
238                                 if (!new_page)
239                                         goto out;
240                                 pte = mk_pte_phys(__pa(new_page), PAGE_RW);
241                                 pte_val(pte) |= _SEGMENT_ENTRY_LARGE;
242                                 pmd_val(*pm_dir) = pte_val(pte);
243                                 address = (address + PMD_SIZE) & PMD_MASK;
244                                 continue;
245                         }
246 #endif
247                         pt_dir = vmem_pte_alloc(address);
248                         if (!pt_dir)
249                                 goto out;
250                         pmd_populate(&init_mm, pm_dir, pt_dir);
251                 } else if (pmd_large(*pm_dir)) {
252                         address = (address + PMD_SIZE) & PMD_MASK;
253                         continue;
254                 }
255
256                 pt_dir = pte_offset_kernel(pm_dir, address);
257                 if (pte_none(*pt_dir)) {
258                         unsigned long new_page;
259
260                         new_page =__pa(vmem_alloc_pages(0));
261                         if (!new_page)
262                                 goto out;
263                         pte = pfn_pte(new_page >> PAGE_SHIFT, PAGE_KERNEL);
264                         *pt_dir = pte;
265                 }
266                 address += PAGE_SIZE;
267         }
268         memset(start, 0, nr * sizeof(struct page));
269         ret = 0;
270 out:
271         flush_tlb_kernel_range(start_addr, end_addr);
272         return ret;
273 }
274
275 /*
276  * Add memory segment to the segment list if it doesn't overlap with
277  * an already present segment.
278  */
279 static int insert_memory_segment(struct memory_segment *seg)
280 {
281         struct memory_segment *tmp;
282
283         if (seg->start + seg->size > VMEM_MAX_PHYS ||
284             seg->start + seg->size < seg->start)
285                 return -ERANGE;
286
287         list_for_each_entry(tmp, &mem_segs, list) {
288                 if (seg->start >= tmp->start + tmp->size)
289                         continue;
290                 if (seg->start + seg->size <= tmp->start)
291                         continue;
292                 return -ENOSPC;
293         }
294         list_add(&seg->list, &mem_segs);
295         return 0;
296 }
297
298 /*
299  * Remove memory segment from the segment list.
300  */
301 static void remove_memory_segment(struct memory_segment *seg)
302 {
303         list_del(&seg->list);
304 }
305
306 static void __remove_shared_memory(struct memory_segment *seg)
307 {
308         remove_memory_segment(seg);
309         vmem_remove_range(seg->start, seg->size);
310 }
311
312 int vmem_remove_mapping(unsigned long start, unsigned long size)
313 {
314         struct memory_segment *seg;
315         int ret;
316
317         mutex_lock(&vmem_mutex);
318
319         ret = -ENOENT;
320         list_for_each_entry(seg, &mem_segs, list) {
321                 if (seg->start == start && seg->size == size)
322                         break;
323         }
324
325         if (seg->start != start || seg->size != size)
326                 goto out;
327
328         ret = 0;
329         __remove_shared_memory(seg);
330         kfree(seg);
331 out:
332         mutex_unlock(&vmem_mutex);
333         return ret;
334 }
335
336 int vmem_add_mapping(unsigned long start, unsigned long size)
337 {
338         struct memory_segment *seg;
339         int ret;
340
341         mutex_lock(&vmem_mutex);
342         ret = -ENOMEM;
343         seg = kzalloc(sizeof(*seg), GFP_KERNEL);
344         if (!seg)
345                 goto out;
346         seg->start = start;
347         seg->size = size;
348
349         ret = insert_memory_segment(seg);
350         if (ret)
351                 goto out_free;
352
353         ret = vmem_add_mem(start, size, 0);
354         if (ret)
355                 goto out_remove;
356         goto out;
357
358 out_remove:
359         __remove_shared_memory(seg);
360 out_free:
361         kfree(seg);
362 out:
363         mutex_unlock(&vmem_mutex);
364         return ret;
365 }
366
367 /*
368  * map whole physical memory to virtual memory (identity mapping)
369  * we reserve enough space in the vmalloc area for vmemmap to hotplug
370  * additional memory segments.
371  */
372 void __init vmem_map_init(void)
373 {
374         unsigned long ro_start, ro_end;
375         unsigned long start, end;
376         int i;
377
378         ro_start = PFN_ALIGN((unsigned long)&_stext);
379         ro_end = (unsigned long)&_eshared & PAGE_MASK;
380         for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
381                 if (memory_chunk[i].type == CHUNK_CRASHK ||
382                     memory_chunk[i].type == CHUNK_OLDMEM)
383                         continue;
384                 start = memory_chunk[i].addr;
385                 end = memory_chunk[i].addr + memory_chunk[i].size;
386                 if (start >= ro_end || end <= ro_start)
387                         vmem_add_mem(start, end - start, 0);
388                 else if (start >= ro_start && end <= ro_end)
389                         vmem_add_mem(start, end - start, 1);
390                 else if (start >= ro_start) {
391                         vmem_add_mem(start, ro_end - start, 1);
392                         vmem_add_mem(ro_end, end - ro_end, 0);
393                 } else if (end < ro_end) {
394                         vmem_add_mem(start, ro_start - start, 0);
395                         vmem_add_mem(ro_start, end - ro_start, 1);
396                 } else {
397                         vmem_add_mem(start, ro_start - start, 0);
398                         vmem_add_mem(ro_start, ro_end - ro_start, 1);
399                         vmem_add_mem(ro_end, end - ro_end, 0);
400                 }
401         }
402 }
403
404 /*
405  * Convert memory chunk array to a memory segment list so there is a single
406  * list that contains both r/w memory and shared memory segments.
407  */
408 static int __init vmem_convert_memory_chunk(void)
409 {
410         struct memory_segment *seg;
411         int i;
412
413         mutex_lock(&vmem_mutex);
414         for (i = 0; i < MEMORY_CHUNKS; i++) {
415                 if (!memory_chunk[i].size)
416                         continue;
417                 if (memory_chunk[i].type == CHUNK_CRASHK ||
418                     memory_chunk[i].type == CHUNK_OLDMEM)
419                         continue;
420                 seg = kzalloc(sizeof(*seg), GFP_KERNEL);
421                 if (!seg)
422                         panic("Out of memory...\n");
423                 seg->start = memory_chunk[i].addr;
424                 seg->size = memory_chunk[i].size;
425                 insert_memory_segment(seg);
426         }
427         mutex_unlock(&vmem_mutex);
428         return 0;
429 }
430
431 core_initcall(vmem_convert_memory_chunk);