X-Git-Url: http://git.cascardo.info/?a=blobdiff_plain;f=mm%2Fslab.c;h=3dbd6f4e74774be9fd1db7e19d581ae31b1f4cce;hb=9a69d1aeccf169d9a1e442c07d3a6e87f06a7b49;hp=2b37a62f631431da4bd95991291d8d94b68806a9;hpb=056c62418cc639bf2fe962c6a6ee56054b838bc7;p=cascardo%2Flinux.git diff --git a/mm/slab.c b/mm/slab.c index 2b37a62f6314..3dbd6f4e7477 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -735,14 +735,6 @@ static inline void init_lock_keys(void) static DEFINE_MUTEX(cache_chain_mutex); static struct list_head cache_chain; -/* - * vm_enough_memory() looks at this to determine how many slab-allocated pages - * are possibly freeable under pressure - * - * SLAB_RECLAIM_ACCOUNT turns this on per-slab - */ -atomic_t slab_reclaim_pages; - /* * chicken and egg problem: delay the per-cpu array allocation * until the general caches are up. @@ -980,7 +972,39 @@ static int transfer_objects(struct array_cache *to, return nr; } -#ifdef CONFIG_NUMA +#ifndef CONFIG_NUMA + +#define drain_alien_cache(cachep, alien) do { } while (0) +#define reap_alien(cachep, l3) do { } while (0) + +static inline struct array_cache **alloc_alien_cache(int node, int limit) +{ + return (struct array_cache **)BAD_ALIEN_MAGIC; +} + +static inline void free_alien_cache(struct array_cache **ac_ptr) +{ +} + +static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) +{ + return 0; +} + +static inline void *alternate_node_alloc(struct kmem_cache *cachep, + gfp_t flags) +{ + return NULL; +} + +static inline void *__cache_alloc_node(struct kmem_cache *cachep, + gfp_t flags, int nodeid) +{ + return NULL; +} + +#else /* CONFIG_NUMA */ + static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int); static void *alternate_node_alloc(struct kmem_cache *, gfp_t); @@ -1109,26 +1133,6 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) } return 1; } - -#else - -#define drain_alien_cache(cachep, alien) do { } while (0) -#define reap_alien(cachep, l3) do { } while (0) - -static inline struct array_cache **alloc_alien_cache(int node, int limit) -{ - return (struct array_cache **)BAD_ALIEN_MAGIC; -} - -static inline void free_alien_cache(struct array_cache **ac_ptr) -{ -} - -static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) -{ - return 0; -} - #endif static int __cpuinit cpuup_callback(struct notifier_block *nfb, @@ -1572,7 +1576,13 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) */ flags |= __GFP_COMP; #endif - flags |= cachep->gfpflags; + + /* + * Under NUMA we want memory on the indicated node. We will handle + * the needed fallback ourselves since we want to serve from our + * per node object lists first for other nodes. + */ + flags |= cachep->gfpflags | GFP_THISNODE; page = alloc_pages_node(nodeid, flags, cachep->gfporder); if (!page) @@ -1580,8 +1590,11 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) nr_pages = (1 << cachep->gfporder); if (cachep->flags & SLAB_RECLAIM_ACCOUNT) - atomic_add(nr_pages, &slab_reclaim_pages); - add_zone_page_state(page_zone(page), NR_SLAB, nr_pages); + add_zone_page_state(page_zone(page), + NR_SLAB_RECLAIMABLE, nr_pages); + else + add_zone_page_state(page_zone(page), + NR_SLAB_UNRECLAIMABLE, nr_pages); for (i = 0; i < nr_pages; i++) __SetPageSlab(page + i); return page_address(page); @@ -1596,7 +1609,12 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) struct page *page = virt_to_page(addr); const unsigned long nr_freed = i; - sub_zone_page_state(page_zone(page), NR_SLAB, nr_freed); + if (cachep->flags & SLAB_RECLAIM_ACCOUNT) + sub_zone_page_state(page_zone(page), + NR_SLAB_RECLAIMABLE, nr_freed); + else + sub_zone_page_state(page_zone(page), + NR_SLAB_UNRECLAIMABLE, nr_freed); while (i--) { BUG_ON(!PageSlab(page)); __ClearPageSlab(page); @@ -1605,8 +1623,6 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) if (current->reclaim_state) current->reclaim_state->reclaimed_slab += nr_freed; free_pages((unsigned long)addr, cachep->gfporder); - if (cachep->flags & SLAB_RECLAIM_ACCOUNT) - atomic_sub(1 << cachep->gfporder, &slab_reclaim_pages); } static void kmem_rcu_free(struct rcu_head *head) @@ -1667,10 +1683,32 @@ static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val) static void dump_line(char *data, int offset, int limit) { int i; + unsigned char error = 0; + int bad_count = 0; + printk(KERN_ERR "%03x:", offset); - for (i = 0; i < limit; i++) + for (i = 0; i < limit; i++) { + if (data[offset + i] != POISON_FREE) { + error = data[offset + i]; + bad_count++; + } printk(" %02x", (unsigned char)data[offset + i]); + } printk("\n"); + + if (bad_count == 1) { + error ^= POISON_FREE; + if (!(error & (error - 1))) { + printk(KERN_ERR "Single bit error detected. Probably " + "bad RAM.\n"); +#ifdef CONFIG_X86 + printk(KERN_ERR "Run memtest86+ or a similar memory " + "test tool.\n"); +#else + printk(KERN_ERR "Run a memory test tool.\n"); +#endif + } + } } #endif @@ -2444,7 +2482,6 @@ EXPORT_SYMBOL(kmem_cache_shrink); * @cachep: the cache to destroy * * Remove a struct kmem_cache object from the slab cache. - * Returns 0 on success. * * It is expected this function will be called by a module when it is * unloaded. This will remove the cache completely, and avoid a duplicate @@ -2456,7 +2493,7 @@ EXPORT_SYMBOL(kmem_cache_shrink); * The caller must guarantee that noone will allocate memory from the cache * during the kmem_cache_destroy(). */ -int kmem_cache_destroy(struct kmem_cache *cachep) +void kmem_cache_destroy(struct kmem_cache *cachep) { BUG_ON(!cachep || in_interrupt()); @@ -2477,7 +2514,7 @@ int kmem_cache_destroy(struct kmem_cache *cachep) list_add(&cachep->next, &cache_chain); mutex_unlock(&cache_chain_mutex); unlock_cpu_hotplug(); - return 1; + return; } if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) @@ -2485,7 +2522,6 @@ int kmem_cache_destroy(struct kmem_cache *cachep) __kmem_cache_destroy(cachep); unlock_cpu_hotplug(); - return 0; } EXPORT_SYMBOL(kmem_cache_destroy); @@ -3032,14 +3068,6 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) void *objp; struct array_cache *ac; -#ifdef CONFIG_NUMA - if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) { - objp = alternate_node_alloc(cachep, flags); - if (objp != NULL) - return objp; - } -#endif - check_irq_off(); ac = cpu_cache_get(cachep); if (likely(ac->avail)) { @@ -3057,12 +3085,24 @@ static __always_inline void *__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) { unsigned long save_flags; - void *objp; + void *objp = NULL; cache_alloc_debugcheck_before(cachep, flags); local_irq_save(save_flags); - objp = ____cache_alloc(cachep, flags); + + if (unlikely(NUMA_BUILD && + current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) + objp = alternate_node_alloc(cachep, flags); + + if (!objp) + objp = ____cache_alloc(cachep, flags); + /* + * We may just have run out of memory on the local node. + * __cache_alloc_node() knows how to locate memory on other nodes + */ + if (NUMA_BUILD && !objp) + objp = __cache_alloc_node(cachep, flags, numa_node_id()); local_irq_restore(save_flags); objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); @@ -3081,7 +3121,7 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) { int nid_alloc, nid_here; - if (in_interrupt()) + if (in_interrupt() || (flags & __GFP_THISNODE)) return NULL; nid_alloc = nid_here = numa_node_id(); if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD)) @@ -3093,6 +3133,28 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) return NULL; } +/* + * Fallback function if there was no memory available and no objects on a + * certain node and we are allowed to fall back. We mimick the behavior of + * the page allocator. We fall back according to a zonelist determined by + * the policy layer while obeying cpuset constraints. + */ +void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) +{ + struct zonelist *zonelist = &NODE_DATA(slab_node(current->mempolicy)) + ->node_zonelists[gfp_zone(flags)]; + struct zone **z; + void *obj = NULL; + + for (z = zonelist->zones; *z && !obj; z++) + if (zone_idx(*z) <= ZONE_NORMAL && + cpuset_zone_allowed(*z, flags)) + obj = __cache_alloc_node(cache, + flags | __GFP_THISNODE, + zone_to_nid(*z)); + return obj; +} + /* * A interface to enable slab creation on nodeid */ @@ -3146,11 +3208,15 @@ retry: must_grow: spin_unlock(&l3->list_lock); x = cache_grow(cachep, flags, nodeid); + if (x) + goto retry; - if (!x) - return NULL; + if (!(flags & __GFP_THISNODE)) + /* Unable to grow the cache. Fall back to other nodes. */ + return fallback_alloc(cachep, flags); + + return NULL; - goto retry; done: return obj; } @@ -3440,130 +3506,6 @@ void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller) EXPORT_SYMBOL(__kmalloc_track_caller); #endif -#ifdef CONFIG_SMP -/** - * percpu_depopulate - depopulate per-cpu data for given cpu - * @__pdata: per-cpu data to depopulate - * @cpu: depopulate per-cpu data for this cpu - * - * Depopulating per-cpu data for a cpu going offline would be a typical - * use case. You need to register a cpu hotplug handler for that purpose. - */ -void percpu_depopulate(void *__pdata, int cpu) -{ - struct percpu_data *pdata = __percpu_disguise(__pdata); - if (pdata->ptrs[cpu]) { - kfree(pdata->ptrs[cpu]); - pdata->ptrs[cpu] = NULL; - } -} -EXPORT_SYMBOL_GPL(percpu_depopulate); - -/** - * percpu_depopulate_mask - depopulate per-cpu data for some cpu's - * @__pdata: per-cpu data to depopulate - * @mask: depopulate per-cpu data for cpu's selected through mask bits - */ -void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask) -{ - int cpu; - for_each_cpu_mask(cpu, *mask) - percpu_depopulate(__pdata, cpu); -} -EXPORT_SYMBOL_GPL(__percpu_depopulate_mask); - -/** - * percpu_populate - populate per-cpu data for given cpu - * @__pdata: per-cpu data to populate further - * @size: size of per-cpu object - * @gfp: may sleep or not etc. - * @cpu: populate per-data for this cpu - * - * Populating per-cpu data for a cpu coming online would be a typical - * use case. You need to register a cpu hotplug handler for that purpose. - * Per-cpu object is populated with zeroed buffer. - */ -void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu) -{ - struct percpu_data *pdata = __percpu_disguise(__pdata); - int node = cpu_to_node(cpu); - - BUG_ON(pdata->ptrs[cpu]); - if (node_online(node)) { - /* FIXME: kzalloc_node(size, gfp, node) */ - pdata->ptrs[cpu] = kmalloc_node(size, gfp, node); - if (pdata->ptrs[cpu]) - memset(pdata->ptrs[cpu], 0, size); - } else - pdata->ptrs[cpu] = kzalloc(size, gfp); - return pdata->ptrs[cpu]; -} -EXPORT_SYMBOL_GPL(percpu_populate); - -/** - * percpu_populate_mask - populate per-cpu data for more cpu's - * @__pdata: per-cpu data to populate further - * @size: size of per-cpu object - * @gfp: may sleep or not etc. - * @mask: populate per-cpu data for cpu's selected through mask bits - * - * Per-cpu objects are populated with zeroed buffers. - */ -int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp, - cpumask_t *mask) -{ - cpumask_t populated = CPU_MASK_NONE; - int cpu; - - for_each_cpu_mask(cpu, *mask) - if (unlikely(!percpu_populate(__pdata, size, gfp, cpu))) { - __percpu_depopulate_mask(__pdata, &populated); - return -ENOMEM; - } else - cpu_set(cpu, populated); - return 0; -} -EXPORT_SYMBOL_GPL(__percpu_populate_mask); - -/** - * percpu_alloc_mask - initial setup of per-cpu data - * @size: size of per-cpu object - * @gfp: may sleep or not etc. - * @mask: populate per-data for cpu's selected through mask bits - * - * Populating per-cpu data for all online cpu's would be a typical use case, - * which is simplified by the percpu_alloc() wrapper. - * Per-cpu objects are populated with zeroed buffers. - */ -void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) -{ - void *pdata = kzalloc(sizeof(struct percpu_data), gfp); - void *__pdata = __percpu_disguise(pdata); - - if (unlikely(!pdata)) - return NULL; - if (likely(!__percpu_populate_mask(__pdata, size, gfp, mask))) - return __pdata; - kfree(pdata); - return NULL; -} -EXPORT_SYMBOL_GPL(__percpu_alloc_mask); - -/** - * percpu_free - final cleanup of per-cpu data - * @__pdata: object to clean up - * - * We simply clean up any per-cpu object left. No need for the client to - * track and specify through a bis mask which per-cpu objects are to free. - */ -void percpu_free(void *__pdata) -{ - __percpu_depopulate_mask(__pdata, &cpu_possible_map); - kfree(__percpu_disguise(__pdata)); -} -EXPORT_SYMBOL_GPL(percpu_free); -#endif /* CONFIG_SMP */ - /** * kmem_cache_free - Deallocate an object * @cachep: The cache the allocation was from. @@ -3725,22 +3667,26 @@ static void do_ccupdate_local(void *info) static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount, int shared) { - struct ccupdate_struct new; + struct ccupdate_struct *new; int i; - memset(&new.new, 0, sizeof(new.new)); + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return -ENOMEM; + for_each_online_cpu(i) { - new.new[i] = alloc_arraycache(cpu_to_node(i), limit, + new->new[i] = alloc_arraycache(cpu_to_node(i), limit, batchcount); - if (!new.new[i]) { + if (!new->new[i]) { for (i--; i >= 0; i--) - kfree(new.new[i]); + kfree(new->new[i]); + kfree(new); return -ENOMEM; } } - new.cachep = cachep; + new->cachep = cachep; - on_each_cpu(do_ccupdate_local, (void *)&new, 1, 1); + on_each_cpu(do_ccupdate_local, (void *)new, 1, 1); check_irq_on(); cachep->batchcount = batchcount; @@ -3748,7 +3694,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, cachep->shared = shared; for_each_online_cpu(i) { - struct array_cache *ccold = new.new[i]; + struct array_cache *ccold = new->new[i]; if (!ccold) continue; spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); @@ -3756,7 +3702,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); kfree(ccold); } - + kfree(new); return alloc_kmemlist(cachep); } @@ -4274,6 +4220,7 @@ static int leaks_show(struct seq_file *m, void *p) show_symbol(m, n[2*i+2]); seq_putc(m, '\n'); } + return 0; }