Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-next
[cascardo/linux.git] / arch / sparc / mm / init_64.c
index d6de935..6b64379 100644 (file)
@@ -354,7 +354,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
 
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
        if (mm->context.huge_pte_count && is_hugetlb_pte(pte))
-               __update_mmu_tsb_insert(mm, MM_TSB_HUGE, HPAGE_SHIFT,
+               __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
                                        address, pte_val(pte));
        else
 #endif
@@ -1557,6 +1557,96 @@ unsigned long __init find_ecache_flush_span(unsigned long size)
        return ~0UL;
 }
 
+unsigned long PAGE_OFFSET;
+EXPORT_SYMBOL(PAGE_OFFSET);
+
+static void __init page_offset_shift_patch_one(unsigned int *insn, unsigned long phys_bits)
+{
+       unsigned long final_shift;
+       unsigned int val = *insn;
+       unsigned int cnt;
+
+       /* We are patching in ilog2(max_supported_phys_address), and
+        * we are doing so in a manner similar to a relocation addend.
+        * That is, we are adding the shift value to whatever value
+        * is in the shift instruction count field already.
+        */
+       cnt = (val & 0x3f);
+       val &= ~0x3f;
+
+       /* If we are trying to shift >= 64 bits, clear the destination
+        * register.  This can happen when phys_bits ends up being equal
+        * to MAX_PHYS_ADDRESS_BITS.
+        */
+       final_shift = (cnt + (64 - phys_bits));
+       if (final_shift >= 64) {
+               unsigned int rd = (val >> 25) & 0x1f;
+
+               val = 0x80100000 | (rd << 25);
+       } else {
+               val |= final_shift;
+       }
+       *insn = val;
+
+       __asm__ __volatile__("flush     %0"
+                            : /* no outputs */
+                            : "r" (insn));
+}
+
+static void __init page_offset_shift_patch(unsigned long phys_bits)
+{
+       extern unsigned int __page_offset_shift_patch;
+       extern unsigned int __page_offset_shift_patch_end;
+       unsigned int *p;
+
+       p = &__page_offset_shift_patch;
+       while (p < &__page_offset_shift_patch_end) {
+               unsigned int *insn = (unsigned int *)(unsigned long)*p;
+
+               page_offset_shift_patch_one(insn, phys_bits);
+
+               p++;
+       }
+}
+
+static void __init setup_page_offset(void)
+{
+       unsigned long max_phys_bits = 40;
+
+       if (tlb_type == cheetah || tlb_type == cheetah_plus) {
+               max_phys_bits = 42;
+       } else if (tlb_type == hypervisor) {
+               switch (sun4v_chip_type) {
+               case SUN4V_CHIP_NIAGARA1:
+               case SUN4V_CHIP_NIAGARA2:
+                       max_phys_bits = 39;
+                       break;
+               case SUN4V_CHIP_NIAGARA3:
+                       max_phys_bits = 43;
+                       break;
+               case SUN4V_CHIP_NIAGARA4:
+               case SUN4V_CHIP_NIAGARA5:
+               case SUN4V_CHIP_SPARC64X:
+               default:
+                       max_phys_bits = 47;
+                       break;
+               }
+       }
+
+       if (max_phys_bits > MAX_PHYS_ADDRESS_BITS) {
+               prom_printf("MAX_PHYS_ADDRESS_BITS is too small, need %lu\n",
+                           max_phys_bits);
+               prom_halt();
+       }
+
+       PAGE_OFFSET = PAGE_OFFSET_BY_BITS(max_phys_bits);
+
+       pr_info("PAGE_OFFSET is 0x%016lx (max_phys_bits == %lu)\n",
+               PAGE_OFFSET, max_phys_bits);
+
+       page_offset_shift_patch(max_phys_bits);
+}
+
 static void __init tsb_phys_patch(void)
 {
        struct tsb_ldquad_phys_patch_entry *pquad;
@@ -1722,7 +1812,7 @@ static void __init sun4v_linear_pte_xor_finalize(void)
 #ifndef CONFIG_DEBUG_PAGEALLOC
        if (cpu_pgsz_mask & HV_PGSZ_MASK_256MB) {
                kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^
-                       0xfffff80000000000UL;
+                       PAGE_OFFSET;
                kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V |
                                           _PAGE_P_4V | _PAGE_W_4V);
        } else {
@@ -1731,7 +1821,7 @@ static void __init sun4v_linear_pte_xor_finalize(void)
 
        if (cpu_pgsz_mask & HV_PGSZ_MASK_2GB) {
                kern_linear_pte_xor[2] = (_PAGE_VALID | _PAGE_SZ2GB_4V) ^
-                       0xfffff80000000000UL;
+                       PAGE_OFFSET;
                kern_linear_pte_xor[2] |= (_PAGE_CP_4V | _PAGE_CV_4V |
                                           _PAGE_P_4V | _PAGE_W_4V);
        } else {
@@ -1740,7 +1830,7 @@ static void __init sun4v_linear_pte_xor_finalize(void)
 
        if (cpu_pgsz_mask & HV_PGSZ_MASK_16GB) {
                kern_linear_pte_xor[3] = (_PAGE_VALID | _PAGE_SZ16GB_4V) ^
-                       0xfffff80000000000UL;
+                       PAGE_OFFSET;
                kern_linear_pte_xor[3] |= (_PAGE_CP_4V | _PAGE_CV_4V |
                                           _PAGE_P_4V | _PAGE_W_4V);
        } else {
@@ -1752,7 +1842,7 @@ static void __init sun4v_linear_pte_xor_finalize(void)
 /* paging_init() sets up the page tables */
 
 static unsigned long last_valid_pfn;
-pgd_t swapper_pg_dir[2048];
+pgd_t swapper_pg_dir[PTRS_PER_PGD];
 
 static void sun4u_pgprot_init(void);
 static void sun4v_pgprot_init(void);
@@ -1763,6 +1853,8 @@ void __init paging_init(void)
        unsigned long real_end, i;
        int node;
 
+       setup_page_offset();
+
        /* These build time checkes make sure that the dcache_dirty_cpu()
         * page->flags usage will work.
         *
@@ -2261,10 +2353,10 @@ static void __init sun4u_pgprot_init(void)
                     __ACCESS_BITS_4U | _PAGE_E_4U);
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
-       kern_linear_pte_xor[0] = _PAGE_VALID ^ 0xfffff80000000000UL;
+       kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET;
 #else
        kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4U) ^
-               0xfffff80000000000UL;
+               PAGE_OFFSET;
 #endif
        kern_linear_pte_xor[0] |= (_PAGE_CP_4U | _PAGE_CV_4U |
                                   _PAGE_P_4U | _PAGE_W_4U);
@@ -2308,10 +2400,10 @@ static void __init sun4v_pgprot_init(void)
        _PAGE_CACHE = _PAGE_CACHE_4V;
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
-       kern_linear_pte_xor[0] = _PAGE_VALID ^ 0xfffff80000000000UL;
+       kern_linear_pte_xor[0] = _PAGE_VALID ^ PAGE_OFFSET;
 #else
        kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^
-               0xfffff80000000000UL;
+               PAGE_OFFSET;
 #endif
        kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V |
                                   _PAGE_P_4V | _PAGE_W_4V);
@@ -2455,53 +2547,13 @@ void __flush_tlb_all(void)
                             : : "r" (pstate));
 }
 
-static pte_t *get_from_cache(struct mm_struct *mm)
-{
-       struct page *page;
-       pte_t *ret;
-
-       spin_lock(&mm->page_table_lock);
-       page = mm->context.pgtable_page;
-       ret = NULL;
-       if (page) {
-               void *p = page_address(page);
-
-               mm->context.pgtable_page = NULL;
-
-               ret = (pte_t *) (p + (PAGE_SIZE / 2));
-       }
-       spin_unlock(&mm->page_table_lock);
-
-       return ret;
-}
-
-static struct page *__alloc_for_cache(struct mm_struct *mm)
-{
-       struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
-                                      __GFP_REPEAT | __GFP_ZERO);
-
-       if (page) {
-               spin_lock(&mm->page_table_lock);
-               if (!mm->context.pgtable_page) {
-                       atomic_set(&page->_count, 2);
-                       mm->context.pgtable_page = page;
-               }
-               spin_unlock(&mm->page_table_lock);
-       }
-       return page;
-}
-
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
                            unsigned long address)
 {
-       struct page *page;
-       pte_t *pte;
-
-       pte = get_from_cache(mm);
-       if (pte)
-               return pte;
+       struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
+                                      __GFP_REPEAT | __GFP_ZERO);
+       pte_t *pte = NULL;
 
-       page = __alloc_for_cache(mm);
        if (page)
                pte = (pte_t *) page_address(page);
 
@@ -2511,14 +2563,10 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
 pgtable_t pte_alloc_one(struct mm_struct *mm,
                        unsigned long address)
 {
-       struct page *page;
-       pte_t *pte;
-
-       pte = get_from_cache(mm);
-       if (pte)
-               return pte;
+       struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
+                                      __GFP_REPEAT | __GFP_ZERO);
+       pte_t *pte = NULL;
 
-       page = __alloc_for_cache(mm);
        if (!page)
                return NULL;
        if (!pgtable_page_ctor(page)) {
@@ -2530,18 +2578,15 @@ pgtable_t pte_alloc_one(struct mm_struct *mm,
 
 void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
-       struct page *page = virt_to_page(pte);
-       if (put_page_testzero(page))
-               free_hot_cold_page(page, 0);
+       free_page((unsigned long)pte);
 }
 
 static void __pte_free(pgtable_t pte)
 {
        struct page *page = virt_to_page(pte);
-       if (put_page_testzero(page)) {
-               pgtable_page_dtor(page);
-               free_hot_cold_page(page, 0);
-       }
+
+       pgtable_page_dtor(page);
+       __free_page(page);
 }
 
 void pte_free(struct mm_struct *mm, pgtable_t pte)
@@ -2558,124 +2603,27 @@ void pgtable_free(void *table, bool is_page)
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot, bool for_modify)
-{
-       if (pgprot_val(pgprot) & _PAGE_VALID)
-               pmd_val(pmd) |= PMD_HUGE_PRESENT;
-       if (tlb_type == hypervisor) {
-               if (pgprot_val(pgprot) & _PAGE_WRITE_4V)
-                       pmd_val(pmd) |= PMD_HUGE_WRITE;
-               if (pgprot_val(pgprot) & _PAGE_EXEC_4V)
-                       pmd_val(pmd) |= PMD_HUGE_EXEC;
-
-               if (!for_modify) {
-                       if (pgprot_val(pgprot) & _PAGE_ACCESSED_4V)
-                               pmd_val(pmd) |= PMD_HUGE_ACCESSED;
-                       if (pgprot_val(pgprot) & _PAGE_MODIFIED_4V)
-                               pmd_val(pmd) |= PMD_HUGE_DIRTY;
-               }
-       } else {
-               if (pgprot_val(pgprot) & _PAGE_WRITE_4U)
-                       pmd_val(pmd) |= PMD_HUGE_WRITE;
-               if (pgprot_val(pgprot) & _PAGE_EXEC_4U)
-                       pmd_val(pmd) |= PMD_HUGE_EXEC;
-
-               if (!for_modify) {
-                       if (pgprot_val(pgprot) & _PAGE_ACCESSED_4U)
-                               pmd_val(pmd) |= PMD_HUGE_ACCESSED;
-                       if (pgprot_val(pgprot) & _PAGE_MODIFIED_4U)
-                               pmd_val(pmd) |= PMD_HUGE_DIRTY;
-               }
-       }
-
-       return pmd;
-}
-
-pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
-{
-       pmd_t pmd;
-
-       pmd_val(pmd) = (page_nr << ((PAGE_SHIFT - PMD_PADDR_SHIFT)));
-       pmd_val(pmd) |= PMD_ISHUGE;
-       pmd = pmd_set_protbits(pmd, pgprot, false);
-       return pmd;
-}
-
-pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
-{
-       pmd_val(pmd) &= ~(PMD_HUGE_PRESENT |
-                         PMD_HUGE_WRITE |
-                         PMD_HUGE_EXEC);
-       pmd = pmd_set_protbits(pmd, newprot, true);
-       return pmd;
-}
-
-pgprot_t pmd_pgprot(pmd_t entry)
-{
-       unsigned long pte = 0;
-
-       if (pmd_val(entry) & PMD_HUGE_PRESENT)
-               pte |= _PAGE_VALID;
-
-       if (tlb_type == hypervisor) {
-               if (pmd_val(entry) & PMD_HUGE_PRESENT)
-                       pte |= _PAGE_PRESENT_4V;
-               if (pmd_val(entry) & PMD_HUGE_EXEC)
-                       pte |= _PAGE_EXEC_4V;
-               if (pmd_val(entry) & PMD_HUGE_WRITE)
-                       pte |= _PAGE_W_4V;
-               if (pmd_val(entry) & PMD_HUGE_ACCESSED)
-                       pte |= _PAGE_ACCESSED_4V;
-               if (pmd_val(entry) & PMD_HUGE_DIRTY)
-                       pte |= _PAGE_MODIFIED_4V;
-               pte |= _PAGE_CP_4V|_PAGE_CV_4V;
-       } else {
-               if (pmd_val(entry) & PMD_HUGE_PRESENT)
-                       pte |= _PAGE_PRESENT_4U;
-               if (pmd_val(entry) & PMD_HUGE_EXEC)
-                       pte |= _PAGE_EXEC_4U;
-               if (pmd_val(entry) & PMD_HUGE_WRITE)
-                       pte |= _PAGE_W_4U;
-               if (pmd_val(entry) & PMD_HUGE_ACCESSED)
-                       pte |= _PAGE_ACCESSED_4U;
-               if (pmd_val(entry) & PMD_HUGE_DIRTY)
-                       pte |= _PAGE_MODIFIED_4U;
-               pte |= _PAGE_CP_4U|_PAGE_CV_4U;
-       }
-
-       return __pgprot(pte);
-}
-
 void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
                          pmd_t *pmd)
 {
        unsigned long pte, flags;
        struct mm_struct *mm;
        pmd_t entry = *pmd;
-       pgprot_t prot;
 
        if (!pmd_large(entry) || !pmd_young(entry))
                return;
 
-       pte = (pmd_val(entry) & ~PMD_HUGE_PROTBITS);
-       pte <<= PMD_PADDR_SHIFT;
-       pte |= _PAGE_VALID;
-
-       prot = pmd_pgprot(entry);
-
-       if (tlb_type == hypervisor)
-               pgprot_val(prot) |= _PAGE_SZHUGE_4V;
-       else
-               pgprot_val(prot) |= _PAGE_SZHUGE_4U;
+       pte = pmd_val(entry);
 
-       pte |= pgprot_val(prot);
+       /* We are fabricating 8MB pages using 4MB real hw pages.  */
+       pte |= (addr & (1UL << REAL_HPAGE_SHIFT));
 
        mm = vma->vm_mm;
 
        spin_lock_irqsave(&mm->context.lock, flags);
 
        if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL)
-               __update_mmu_tsb_insert(mm, MM_TSB_HUGE, HPAGE_SHIFT,
+               __update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
                                        addr, pte);
 
        spin_unlock_irqrestore(&mm->context.lock, flags);