Merge tag 'spi-fix-v4.9-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi
[cascardo/linux.git] / arch / sparc / mm / tsb.c
1 /* arch/sparc64/mm/tsb.c
2  *
3  * Copyright (C) 2006, 2008 David S. Miller <davem@davemloft.net>
4  */
5
6 #include <linux/kernel.h>
7 #include <linux/preempt.h>
8 #include <linux/slab.h>
9 #include <asm/page.h>
10 #include <asm/pgtable.h>
11 #include <asm/mmu_context.h>
12 #include <asm/setup.h>
13 #include <asm/tsb.h>
14 #include <asm/tlb.h>
15 #include <asm/oplib.h>
16
17 extern struct tsb swapper_tsb[KERNEL_TSB_NENTRIES];
18
19 static inline unsigned long tsb_hash(unsigned long vaddr, unsigned long hash_shift, unsigned long nentries)
20 {
21         vaddr >>= hash_shift;
22         return vaddr & (nentries - 1);
23 }
24
25 static inline int tag_compare(unsigned long tag, unsigned long vaddr)
26 {
27         return (tag == (vaddr >> 22));
28 }
29
30 static void flush_tsb_kernel_range_scan(unsigned long start, unsigned long end)
31 {
32         unsigned long idx;
33
34         for (idx = 0; idx < KERNEL_TSB_NENTRIES; idx++) {
35                 struct tsb *ent = &swapper_tsb[idx];
36                 unsigned long match = idx << 13;
37
38                 match |= (ent->tag << 22);
39                 if (match >= start && match < end)
40                         ent->tag = (1UL << TSB_TAG_INVALID_BIT);
41         }
42 }
43
44 /* TSB flushes need only occur on the processor initiating the address
45  * space modification, not on each cpu the address space has run on.
46  * Only the TLB flush needs that treatment.
47  */
48
49 void flush_tsb_kernel_range(unsigned long start, unsigned long end)
50 {
51         unsigned long v;
52
53         if ((end - start) >> PAGE_SHIFT >= 2 * KERNEL_TSB_NENTRIES)
54                 return flush_tsb_kernel_range_scan(start, end);
55
56         for (v = start; v < end; v += PAGE_SIZE) {
57                 unsigned long hash = tsb_hash(v, PAGE_SHIFT,
58                                               KERNEL_TSB_NENTRIES);
59                 struct tsb *ent = &swapper_tsb[hash];
60
61                 if (tag_compare(ent->tag, v))
62                         ent->tag = (1UL << TSB_TAG_INVALID_BIT);
63         }
64 }
65
66 static void __flush_tsb_one_entry(unsigned long tsb, unsigned long v,
67                                   unsigned long hash_shift,
68                                   unsigned long nentries)
69 {
70         unsigned long tag, ent, hash;
71
72         v &= ~0x1UL;
73         hash = tsb_hash(v, hash_shift, nentries);
74         ent = tsb + (hash * sizeof(struct tsb));
75         tag = (v >> 22UL);
76
77         tsb_flush(ent, tag);
78 }
79
80 static void __flush_tsb_one(struct tlb_batch *tb, unsigned long hash_shift,
81                             unsigned long tsb, unsigned long nentries)
82 {
83         unsigned long i;
84
85         for (i = 0; i < tb->tlb_nr; i++)
86                 __flush_tsb_one_entry(tsb, tb->vaddrs[i], hash_shift, nentries);
87 }
88
89 void flush_tsb_user(struct tlb_batch *tb)
90 {
91         struct mm_struct *mm = tb->mm;
92         unsigned long nentries, base, flags;
93
94         spin_lock_irqsave(&mm->context.lock, flags);
95
96         if (!tb->huge) {
97                 base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
98                 nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
99                 if (tlb_type == cheetah_plus || tlb_type == hypervisor)
100                         base = __pa(base);
101                 __flush_tsb_one(tb, PAGE_SHIFT, base, nentries);
102         }
103 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
104         if (tb->huge && mm->context.tsb_block[MM_TSB_HUGE].tsb) {
105                 base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
106                 nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
107                 if (tlb_type == cheetah_plus || tlb_type == hypervisor)
108                         base = __pa(base);
109                 __flush_tsb_one(tb, REAL_HPAGE_SHIFT, base, nentries);
110         }
111 #endif
112         spin_unlock_irqrestore(&mm->context.lock, flags);
113 }
114
115 void flush_tsb_user_page(struct mm_struct *mm, unsigned long vaddr, bool huge)
116 {
117         unsigned long nentries, base, flags;
118
119         spin_lock_irqsave(&mm->context.lock, flags);
120
121         if (!huge) {
122                 base = (unsigned long) mm->context.tsb_block[MM_TSB_BASE].tsb;
123                 nentries = mm->context.tsb_block[MM_TSB_BASE].tsb_nentries;
124                 if (tlb_type == cheetah_plus || tlb_type == hypervisor)
125                         base = __pa(base);
126                 __flush_tsb_one_entry(base, vaddr, PAGE_SHIFT, nentries);
127         }
128 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
129         if (huge && mm->context.tsb_block[MM_TSB_HUGE].tsb) {
130                 base = (unsigned long) mm->context.tsb_block[MM_TSB_HUGE].tsb;
131                 nentries = mm->context.tsb_block[MM_TSB_HUGE].tsb_nentries;
132                 if (tlb_type == cheetah_plus || tlb_type == hypervisor)
133                         base = __pa(base);
134                 __flush_tsb_one_entry(base, vaddr, REAL_HPAGE_SHIFT, nentries);
135         }
136 #endif
137         spin_unlock_irqrestore(&mm->context.lock, flags);
138 }
139
140 #define HV_PGSZ_IDX_BASE        HV_PGSZ_IDX_8K
141 #define HV_PGSZ_MASK_BASE       HV_PGSZ_MASK_8K
142
143 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
144 #define HV_PGSZ_IDX_HUGE        HV_PGSZ_IDX_4MB
145 #define HV_PGSZ_MASK_HUGE       HV_PGSZ_MASK_4MB
146 #endif
147
148 static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_idx, unsigned long tsb_bytes)
149 {
150         unsigned long tsb_reg, base, tsb_paddr;
151         unsigned long page_sz, tte;
152
153         mm->context.tsb_block[tsb_idx].tsb_nentries =
154                 tsb_bytes / sizeof(struct tsb);
155
156         switch (tsb_idx) {
157         case MM_TSB_BASE:
158                 base = TSBMAP_8K_BASE;
159                 break;
160 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
161         case MM_TSB_HUGE:
162                 base = TSBMAP_4M_BASE;
163                 break;
164 #endif
165         default:
166                 BUG();
167         }
168
169         tte = pgprot_val(PAGE_KERNEL_LOCKED);
170         tsb_paddr = __pa(mm->context.tsb_block[tsb_idx].tsb);
171         BUG_ON(tsb_paddr & (tsb_bytes - 1UL));
172
173         /* Use the smallest page size that can map the whole TSB
174          * in one TLB entry.
175          */
176         switch (tsb_bytes) {
177         case 8192 << 0:
178                 tsb_reg = 0x0UL;
179 #ifdef DCACHE_ALIASING_POSSIBLE
180                 base += (tsb_paddr & 8192);
181 #endif
182                 page_sz = 8192;
183                 break;
184
185         case 8192 << 1:
186                 tsb_reg = 0x1UL;
187                 page_sz = 64 * 1024;
188                 break;
189
190         case 8192 << 2:
191                 tsb_reg = 0x2UL;
192                 page_sz = 64 * 1024;
193                 break;
194
195         case 8192 << 3:
196                 tsb_reg = 0x3UL;
197                 page_sz = 64 * 1024;
198                 break;
199
200         case 8192 << 4:
201                 tsb_reg = 0x4UL;
202                 page_sz = 512 * 1024;
203                 break;
204
205         case 8192 << 5:
206                 tsb_reg = 0x5UL;
207                 page_sz = 512 * 1024;
208                 break;
209
210         case 8192 << 6:
211                 tsb_reg = 0x6UL;
212                 page_sz = 512 * 1024;
213                 break;
214
215         case 8192 << 7:
216                 tsb_reg = 0x7UL;
217                 page_sz = 4 * 1024 * 1024;
218                 break;
219
220         default:
221                 printk(KERN_ERR "TSB[%s:%d]: Impossible TSB size %lu, killing process.\n",
222                        current->comm, current->pid, tsb_bytes);
223                 do_exit(SIGSEGV);
224         }
225         tte |= pte_sz_bits(page_sz);
226
227         if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
228                 /* Physical mapping, no locked TLB entry for TSB.  */
229                 tsb_reg |= tsb_paddr;
230
231                 mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg;
232                 mm->context.tsb_block[tsb_idx].tsb_map_vaddr = 0;
233                 mm->context.tsb_block[tsb_idx].tsb_map_pte = 0;
234         } else {
235                 tsb_reg |= base;
236                 tsb_reg |= (tsb_paddr & (page_sz - 1UL));
237                 tte |= (tsb_paddr & ~(page_sz - 1UL));
238
239                 mm->context.tsb_block[tsb_idx].tsb_reg_val = tsb_reg;
240                 mm->context.tsb_block[tsb_idx].tsb_map_vaddr = base;
241                 mm->context.tsb_block[tsb_idx].tsb_map_pte = tte;
242         }
243
244         /* Setup the Hypervisor TSB descriptor.  */
245         if (tlb_type == hypervisor) {
246                 struct hv_tsb_descr *hp = &mm->context.tsb_descr[tsb_idx];
247
248                 switch (tsb_idx) {
249                 case MM_TSB_BASE:
250                         hp->pgsz_idx = HV_PGSZ_IDX_BASE;
251                         break;
252 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
253                 case MM_TSB_HUGE:
254                         hp->pgsz_idx = HV_PGSZ_IDX_HUGE;
255                         break;
256 #endif
257                 default:
258                         BUG();
259                 }
260                 hp->assoc = 1;
261                 hp->num_ttes = tsb_bytes / 16;
262                 hp->ctx_idx = 0;
263                 switch (tsb_idx) {
264                 case MM_TSB_BASE:
265                         hp->pgsz_mask = HV_PGSZ_MASK_BASE;
266                         break;
267 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
268                 case MM_TSB_HUGE:
269                         hp->pgsz_mask = HV_PGSZ_MASK_HUGE;
270                         break;
271 #endif
272                 default:
273                         BUG();
274                 }
275                 hp->tsb_base = tsb_paddr;
276                 hp->resv = 0;
277         }
278 }
279
280 struct kmem_cache *pgtable_cache __read_mostly;
281
282 static struct kmem_cache *tsb_caches[8] __read_mostly;
283
284 static const char *tsb_cache_names[8] = {
285         "tsb_8KB",
286         "tsb_16KB",
287         "tsb_32KB",
288         "tsb_64KB",
289         "tsb_128KB",
290         "tsb_256KB",
291         "tsb_512KB",
292         "tsb_1MB",
293 };
294
295 void __init pgtable_cache_init(void)
296 {
297         unsigned long i;
298
299         pgtable_cache = kmem_cache_create("pgtable_cache",
300                                           PAGE_SIZE, PAGE_SIZE,
301                                           0,
302                                           _clear_page);
303         if (!pgtable_cache) {
304                 prom_printf("pgtable_cache_init(): Could not create!\n");
305                 prom_halt();
306         }
307
308         for (i = 0; i < ARRAY_SIZE(tsb_cache_names); i++) {
309                 unsigned long size = 8192 << i;
310                 const char *name = tsb_cache_names[i];
311
312                 tsb_caches[i] = kmem_cache_create(name,
313                                                   size, size,
314                                                   0, NULL);
315                 if (!tsb_caches[i]) {
316                         prom_printf("Could not create %s cache\n", name);
317                         prom_halt();
318                 }
319         }
320 }
321
322 int sysctl_tsb_ratio = -2;
323
324 static unsigned long tsb_size_to_rss_limit(unsigned long new_size)
325 {
326         unsigned long num_ents = (new_size / sizeof(struct tsb));
327
328         if (sysctl_tsb_ratio < 0)
329                 return num_ents - (num_ents >> -sysctl_tsb_ratio);
330         else
331                 return num_ents + (num_ents >> sysctl_tsb_ratio);
332 }
333
334 /* When the RSS of an address space exceeds tsb_rss_limit for a TSB,
335  * do_sparc64_fault() invokes this routine to try and grow it.
336  *
337  * When we reach the maximum TSB size supported, we stick ~0UL into
338  * tsb_rss_limit for that TSB so the grow checks in do_sparc64_fault()
339  * will not trigger any longer.
340  *
341  * The TSB can be anywhere from 8K to 1MB in size, in increasing powers
342  * of two.  The TSB must be aligned to it's size, so f.e. a 512K TSB
343  * must be 512K aligned.  It also must be physically contiguous, so we
344  * cannot use vmalloc().
345  *
346  * The idea here is to grow the TSB when the RSS of the process approaches
347  * the number of entries that the current TSB can hold at once.  Currently,
348  * we trigger when the RSS hits 3/4 of the TSB capacity.
349  */
350 void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss)
351 {
352         unsigned long max_tsb_size = 1 * 1024 * 1024;
353         unsigned long new_size, old_size, flags;
354         struct tsb *old_tsb, *new_tsb;
355         unsigned long new_cache_index, old_cache_index;
356         unsigned long new_rss_limit;
357         gfp_t gfp_flags;
358
359         if (max_tsb_size > (PAGE_SIZE << MAX_ORDER))
360                 max_tsb_size = (PAGE_SIZE << MAX_ORDER);
361
362         new_cache_index = 0;
363         for (new_size = 8192; new_size < max_tsb_size; new_size <<= 1UL) {
364                 new_rss_limit = tsb_size_to_rss_limit(new_size);
365                 if (new_rss_limit > rss)
366                         break;
367                 new_cache_index++;
368         }
369
370         if (new_size == max_tsb_size)
371                 new_rss_limit = ~0UL;
372
373 retry_tsb_alloc:
374         gfp_flags = GFP_KERNEL;
375         if (new_size > (PAGE_SIZE * 2))
376                 gfp_flags |= __GFP_NOWARN | __GFP_NORETRY;
377
378         new_tsb = kmem_cache_alloc_node(tsb_caches[new_cache_index],
379                                         gfp_flags, numa_node_id());
380         if (unlikely(!new_tsb)) {
381                 /* Not being able to fork due to a high-order TSB
382                  * allocation failure is very bad behavior.  Just back
383                  * down to a 0-order allocation and force no TSB
384                  * growing for this address space.
385                  */
386                 if (mm->context.tsb_block[tsb_index].tsb == NULL &&
387                     new_cache_index > 0) {
388                         new_cache_index = 0;
389                         new_size = 8192;
390                         new_rss_limit = ~0UL;
391                         goto retry_tsb_alloc;
392                 }
393
394                 /* If we failed on a TSB grow, we are under serious
395                  * memory pressure so don't try to grow any more.
396                  */
397                 if (mm->context.tsb_block[tsb_index].tsb != NULL)
398                         mm->context.tsb_block[tsb_index].tsb_rss_limit = ~0UL;
399                 return;
400         }
401
402         /* Mark all tags as invalid.  */
403         tsb_init(new_tsb, new_size);
404
405         /* Ok, we are about to commit the changes.  If we are
406          * growing an existing TSB the locking is very tricky,
407          * so WATCH OUT!
408          *
409          * We have to hold mm->context.lock while committing to the
410          * new TSB, this synchronizes us with processors in
411          * flush_tsb_user() and switch_mm() for this address space.
412          *
413          * But even with that lock held, processors run asynchronously
414          * accessing the old TSB via TLB miss handling.  This is OK
415          * because those actions are just propagating state from the
416          * Linux page tables into the TSB, page table mappings are not
417          * being changed.  If a real fault occurs, the processor will
418          * synchronize with us when it hits flush_tsb_user(), this is
419          * also true for the case where vmscan is modifying the page
420          * tables.  The only thing we need to be careful with is to
421          * skip any locked TSB entries during copy_tsb().
422          *
423          * When we finish committing to the new TSB, we have to drop
424          * the lock and ask all other cpus running this address space
425          * to run tsb_context_switch() to see the new TSB table.
426          */
427         spin_lock_irqsave(&mm->context.lock, flags);
428
429         old_tsb = mm->context.tsb_block[tsb_index].tsb;
430         old_cache_index =
431                 (mm->context.tsb_block[tsb_index].tsb_reg_val & 0x7UL);
432         old_size = (mm->context.tsb_block[tsb_index].tsb_nentries *
433                     sizeof(struct tsb));
434
435
436         /* Handle multiple threads trying to grow the TSB at the same time.
437          * One will get in here first, and bump the size and the RSS limit.
438          * The others will get in here next and hit this check.
439          */
440         if (unlikely(old_tsb &&
441                      (rss < mm->context.tsb_block[tsb_index].tsb_rss_limit))) {
442                 spin_unlock_irqrestore(&mm->context.lock, flags);
443
444                 kmem_cache_free(tsb_caches[new_cache_index], new_tsb);
445                 return;
446         }
447
448         mm->context.tsb_block[tsb_index].tsb_rss_limit = new_rss_limit;
449
450         if (old_tsb) {
451                 extern void copy_tsb(unsigned long old_tsb_base,
452                                      unsigned long old_tsb_size,
453                                      unsigned long new_tsb_base,
454                                      unsigned long new_tsb_size);
455                 unsigned long old_tsb_base = (unsigned long) old_tsb;
456                 unsigned long new_tsb_base = (unsigned long) new_tsb;
457
458                 if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
459                         old_tsb_base = __pa(old_tsb_base);
460                         new_tsb_base = __pa(new_tsb_base);
461                 }
462                 copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size);
463         }
464
465         mm->context.tsb_block[tsb_index].tsb = new_tsb;
466         setup_tsb_params(mm, tsb_index, new_size);
467
468         spin_unlock_irqrestore(&mm->context.lock, flags);
469
470         /* If old_tsb is NULL, we're being invoked for the first time
471          * from init_new_context().
472          */
473         if (old_tsb) {
474                 /* Reload it on the local cpu.  */
475                 tsb_context_switch(mm);
476
477                 /* Now force other processors to do the same.  */
478                 preempt_disable();
479                 smp_tsb_sync(mm);
480                 preempt_enable();
481
482                 /* Now it is safe to free the old tsb.  */
483                 kmem_cache_free(tsb_caches[old_cache_index], old_tsb);
484         }
485 }
486
487 int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
488 {
489         unsigned long mm_rss = get_mm_rss(mm);
490 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
491         unsigned long saved_hugetlb_pte_count;
492         unsigned long saved_thp_pte_count;
493 #endif
494         unsigned int i;
495
496         spin_lock_init(&mm->context.lock);
497
498         mm->context.sparc64_ctx_val = 0UL;
499
500 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
501         /* We reset them to zero because the fork() page copying
502          * will re-increment the counters as the parent PTEs are
503          * copied into the child address space.
504          */
505         saved_hugetlb_pte_count = mm->context.hugetlb_pte_count;
506         saved_thp_pte_count = mm->context.thp_pte_count;
507         mm->context.hugetlb_pte_count = 0;
508         mm->context.thp_pte_count = 0;
509
510         mm_rss -= saved_thp_pte_count * (HPAGE_SIZE / PAGE_SIZE);
511 #endif
512
513         /* copy_mm() copies over the parent's mm_struct before calling
514          * us, so we need to zero out the TSB pointer or else tsb_grow()
515          * will be confused and think there is an older TSB to free up.
516          */
517         for (i = 0; i < MM_NUM_TSBS; i++)
518                 mm->context.tsb_block[i].tsb = NULL;
519
520         /* If this is fork, inherit the parent's TSB size.  We would
521          * grow it to that size on the first page fault anyways.
522          */
523         tsb_grow(mm, MM_TSB_BASE, mm_rss);
524
525 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
526         if (unlikely(saved_hugetlb_pte_count + saved_thp_pte_count))
527                 tsb_grow(mm, MM_TSB_HUGE,
528                          (saved_hugetlb_pte_count + saved_thp_pte_count) *
529                          REAL_HPAGE_PER_HPAGE);
530 #endif
531
532         if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb))
533                 return -ENOMEM;
534
535         return 0;
536 }
537
538 static void tsb_destroy_one(struct tsb_config *tp)
539 {
540         unsigned long cache_index;
541
542         if (!tp->tsb)
543                 return;
544         cache_index = tp->tsb_reg_val & 0x7UL;
545         kmem_cache_free(tsb_caches[cache_index], tp->tsb);
546         tp->tsb = NULL;
547         tp->tsb_reg_val = 0UL;
548 }
549
550 void destroy_context(struct mm_struct *mm)
551 {
552         unsigned long flags, i;
553
554         for (i = 0; i < MM_NUM_TSBS; i++)
555                 tsb_destroy_one(&mm->context.tsb_block[i]);
556
557         spin_lock_irqsave(&ctx_alloc_lock, flags);
558
559         if (CTX_VALID(mm->context)) {
560                 unsigned long nr = CTX_NRBITS(mm->context);
561                 mmu_context_bmap[nr>>6] &= ~(1UL << (nr & 63));
562         }
563
564         spin_unlock_irqrestore(&ctx_alloc_lock, flags);
565 }