*/
/*
- * This allocator is designed for use with zram. Thus, the allocator is
- * supposed to work well under low memory conditions. In particular, it
- * never attempts higher order page allocation which is very likely to
- * fail under memory pressure. On the other hand, if we just use single
- * (0-order) pages, it would suffer from very high fragmentation --
- * any object of size PAGE_SIZE/2 or larger would occupy an entire page.
- * This was one of the major issues with its predecessor (xvmalloc).
- *
- * To overcome these issues, zsmalloc allocates a bunch of 0-order pages
- * and links them together using various 'struct page' fields. These linked
- * pages act as a single higher-order page i.e. an object can span 0-order
- * page boundaries. The code refers to these linked pages as a single entity
- * called zspage.
- *
- * For simplicity, zsmalloc can only allocate objects of size up to PAGE_SIZE
- * since this satisfies the requirements of all its current users (in the
- * worst case, page is incompressible and is thus stored "as-is" i.e. in
- * uncompressed form). For allocation requests larger than this size, failure
- * is returned (see zs_malloc).
- *
- * Additionally, zs_malloc() does not return a dereferenceable pointer.
- * Instead, it returns an opaque handle (unsigned long) which encodes actual
- * location of the allocated object. The reason for this indirection is that
- * zsmalloc does not keep zspages permanently mapped since that would cause
- * issues on 32-bit systems where the VA region for kernel space mappings
- * is very small. So, before using the allocating memory, the object has to
- * be mapped using zs_map_object() to get a usable pointer and subsequently
- * unmapped using zs_unmap_object().
- *
* Following is how we use various fields and flags of underlying
* struct page(s) to form a zspage.
*
*
* page->private (union with page->first_page): refers to the
* component page after the first page
+ * If the page is first_page for huge object, it stores handle.
+ * Look at size_class->huge.
* page->freelist: points to the first free object in zspage.
* Free objects are linked together using in-place
* metadata.
#define ZS_MIN_ALLOC_SIZE \
MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS))
/* each chunk includes extra space to keep handle */
-#define ZS_MAX_ALLOC_SIZE (PAGE_SIZE + ZS_HANDLE_SIZE)
+#define ZS_MAX_ALLOC_SIZE PAGE_SIZE
/*
* On systems with 4K page size, this gives 255 size classes! There is a
enum zs_stat_type {
OBJ_ALLOCATED,
OBJ_USED,
+ CLASS_ALMOST_FULL,
+ CLASS_ALMOST_EMPTY,
NR_ZS_STAT_TYPE,
};
/* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */
int pages_per_zspage;
+ /* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */
+ bool huge;
#ifdef CONFIG_ZSMALLOC_STAT
struct zs_size_stat stats;
#endif
char *vm_addr; /* address of kmap_atomic()'ed pages */
enum zs_mapmode vm_mm; /* mapping mode */
+ bool huge;
};
static int create_handle_cache(struct zs_pool *pool)
MODULE_ALIAS("zpool-zsmalloc");
#endif /* CONFIG_ZPOOL */
+static unsigned int get_maxobj_per_zspage(int size, int pages_per_zspage)
+{
+ return pages_per_zspage * PAGE_SIZE / size;
+}
+
/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
idx = DIV_ROUND_UP(size - ZS_MIN_ALLOC_SIZE,
ZS_SIZE_CLASS_DELTA);
- return idx;
+ return min(zs_size_classes - 1, idx);
+}
+
+#ifdef CONFIG_ZSMALLOC_STAT
+
+static inline void zs_stat_inc(struct size_class *class,
+ enum zs_stat_type type, unsigned long cnt)
+{
+ class->stats.objs[type] += cnt;
+}
+
+static inline void zs_stat_dec(struct size_class *class,
+ enum zs_stat_type type, unsigned long cnt)
+{
+ class->stats.objs[type] -= cnt;
+}
+
+static inline unsigned long zs_stat_get(struct size_class *class,
+ enum zs_stat_type type)
+{
+ return class->stats.objs[type];
+}
+
+static int __init zs_stat_init(void)
+{
+ if (!debugfs_initialized())
+ return -ENODEV;
+
+ zs_stat_root = debugfs_create_dir("zsmalloc", NULL);
+ if (!zs_stat_root)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void __exit zs_stat_exit(void)
+{
+ debugfs_remove_recursive(zs_stat_root);
+}
+
+static int zs_stats_size_show(struct seq_file *s, void *v)
+{
+ int i;
+ struct zs_pool *pool = s->private;
+ struct size_class *class;
+ int objs_per_zspage;
+ unsigned long class_almost_full, class_almost_empty;
+ unsigned long obj_allocated, obj_used, pages_used;
+ unsigned long total_class_almost_full = 0, total_class_almost_empty = 0;
+ unsigned long total_objs = 0, total_used_objs = 0, total_pages = 0;
+
+ seq_printf(s, " %5s %5s %11s %12s %13s %10s %10s %16s\n",
+ "class", "size", "almost_full", "almost_empty",
+ "obj_allocated", "obj_used", "pages_used",
+ "pages_per_zspage");
+
+ for (i = 0; i < zs_size_classes; i++) {
+ class = pool->size_class[i];
+
+ if (class->index != i)
+ continue;
+
+ spin_lock(&class->lock);
+ class_almost_full = zs_stat_get(class, CLASS_ALMOST_FULL);
+ class_almost_empty = zs_stat_get(class, CLASS_ALMOST_EMPTY);
+ obj_allocated = zs_stat_get(class, OBJ_ALLOCATED);
+ obj_used = zs_stat_get(class, OBJ_USED);
+ spin_unlock(&class->lock);
+
+ objs_per_zspage = get_maxobj_per_zspage(class->size,
+ class->pages_per_zspage);
+ pages_used = obj_allocated / objs_per_zspage *
+ class->pages_per_zspage;
+
+ seq_printf(s, " %5u %5u %11lu %12lu %13lu %10lu %10lu %16d\n",
+ i, class->size, class_almost_full, class_almost_empty,
+ obj_allocated, obj_used, pages_used,
+ class->pages_per_zspage);
+
+ total_class_almost_full += class_almost_full;
+ total_class_almost_empty += class_almost_empty;
+ total_objs += obj_allocated;
+ total_used_objs += obj_used;
+ total_pages += pages_used;
+ }
+
+ seq_puts(s, "\n");
+ seq_printf(s, " %5s %5s %11lu %12lu %13lu %10lu %10lu\n",
+ "Total", "", total_class_almost_full,
+ total_class_almost_empty, total_objs,
+ total_used_objs, total_pages);
+
+ return 0;
}
+static int zs_stats_size_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, zs_stats_size_show, inode->i_private);
+}
+
+static const struct file_operations zs_stat_size_ops = {
+ .open = zs_stats_size_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int zs_pool_stat_create(char *name, struct zs_pool *pool)
+{
+ struct dentry *entry;
+
+ if (!zs_stat_root)
+ return -ENODEV;
+
+ entry = debugfs_create_dir(name, zs_stat_root);
+ if (!entry) {
+ pr_warn("debugfs dir <%s> creation failed\n", name);
+ return -ENOMEM;
+ }
+ pool->stat_dentry = entry;
+
+ entry = debugfs_create_file("classes", S_IFREG | S_IRUGO,
+ pool->stat_dentry, pool, &zs_stat_size_ops);
+ if (!entry) {
+ pr_warn("%s: debugfs file entry <%s> creation failed\n",
+ name, "classes");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void zs_pool_stat_destroy(struct zs_pool *pool)
+{
+ debugfs_remove_recursive(pool->stat_dentry);
+}
+
+#else /* CONFIG_ZSMALLOC_STAT */
+
+static inline void zs_stat_inc(struct size_class *class,
+ enum zs_stat_type type, unsigned long cnt)
+{
+}
+
+static inline void zs_stat_dec(struct size_class *class,
+ enum zs_stat_type type, unsigned long cnt)
+{
+}
+
+static inline unsigned long zs_stat_get(struct size_class *class,
+ enum zs_stat_type type)
+{
+ return 0;
+}
+
+static int __init zs_stat_init(void)
+{
+ return 0;
+}
+
+static void __exit zs_stat_exit(void)
+{
+}
+
+static inline int zs_pool_stat_create(char *name, struct zs_pool *pool)
+{
+ return 0;
+}
+
+static inline void zs_pool_stat_destroy(struct zs_pool *pool)
+{
+}
+
+#endif
+
+
/*
* For each size class, zspages are divided into different groups
* depending on how "full" they are. This was done so that we could
list_add_tail(&page->lru, &(*head)->lru);
*head = page;
+ zs_stat_inc(class, fullness == ZS_ALMOST_EMPTY ?
+ CLASS_ALMOST_EMPTY : CLASS_ALMOST_FULL, 1);
}
/*
struct page, lru);
list_del_init(&page->lru);
+ zs_stat_dec(class, fullness == ZS_ALMOST_EMPTY ?
+ CLASS_ALMOST_EMPTY : CLASS_ALMOST_FULL, 1);
}
/*
* to form a zspage for each size class. This is important
* to reduce wastage due to unusable space left at end of
* each zspage which is given as:
- * wastage = Zp - Zp % size_class
+ * wastage = Zp % class_size
+ * usage = Zp - wastage
* where Zp = zspage size = k * PAGE_SIZE where k = 1, 2, ...
*
* For example, for size class of 3/8 * PAGE_SIZE, we should
return *(unsigned long *)handle;
}
-unsigned long obj_to_head(void *obj)
+static unsigned long obj_to_head(struct size_class *class, struct page *page,
+ void *obj)
{
- return *(unsigned long *)obj;
+ if (class->huge) {
+ VM_BUG_ON(!is_first_page(page));
+ return *(unsigned long *)page_private(page);
+ } else
+ return *(unsigned long *)obj;
}
static unsigned long obj_idx_to_offset(struct page *page,
if (area->vm_mm == ZS_MM_RO)
goto out;
- buf = area->vm_buf + ZS_HANDLE_SIZE;
- size -= ZS_HANDLE_SIZE;
- off += ZS_HANDLE_SIZE;
+ buf = area->vm_buf;
+ if (!area->huge) {
+ buf = buf + ZS_HANDLE_SIZE;
+ size -= ZS_HANDLE_SIZE;
+ off += ZS_HANDLE_SIZE;
+ }
sizes[0] = PAGE_SIZE - off;
sizes[1] = size - sizes[0];
zs_size_classes = nr;
}
-static unsigned int get_maxobj_per_zspage(int size, int pages_per_zspage)
-{
- return pages_per_zspage * PAGE_SIZE / size;
-}
-
static bool can_merge(struct size_class *prev, int size, int pages_per_zspage)
{
if (prev->pages_per_zspage != pages_per_zspage)
return page->inuse == page->objects;
}
-#ifdef CONFIG_ZSMALLOC_STAT
-
-static inline void zs_stat_inc(struct size_class *class,
- enum zs_stat_type type, unsigned long cnt)
-{
- class->stats.objs[type] += cnt;
-}
-
-static inline void zs_stat_dec(struct size_class *class,
- enum zs_stat_type type, unsigned long cnt)
-{
- class->stats.objs[type] -= cnt;
-}
-
-static inline unsigned long zs_stat_get(struct size_class *class,
- enum zs_stat_type type)
-{
- return class->stats.objs[type];
-}
-
-static int __init zs_stat_init(void)
-{
- if (!debugfs_initialized())
- return -ENODEV;
-
- zs_stat_root = debugfs_create_dir("zsmalloc", NULL);
- if (!zs_stat_root)
- return -ENOMEM;
-
- return 0;
-}
-
-static void __exit zs_stat_exit(void)
-{
- debugfs_remove_recursive(zs_stat_root);
-}
-
-static int zs_stats_size_show(struct seq_file *s, void *v)
-{
- int i;
- struct zs_pool *pool = s->private;
- struct size_class *class;
- int objs_per_zspage;
- unsigned long obj_allocated, obj_used, pages_used;
- unsigned long total_objs = 0, total_used_objs = 0, total_pages = 0;
-
- seq_printf(s, " %5s %5s %13s %10s %10s\n", "class", "size",
- "obj_allocated", "obj_used", "pages_used");
-
- for (i = 0; i < zs_size_classes; i++) {
- class = pool->size_class[i];
-
- if (class->index != i)
- continue;
-
- spin_lock(&class->lock);
- obj_allocated = zs_stat_get(class, OBJ_ALLOCATED);
- obj_used = zs_stat_get(class, OBJ_USED);
- spin_unlock(&class->lock);
-
- objs_per_zspage = get_maxobj_per_zspage(class->size,
- class->pages_per_zspage);
- pages_used = obj_allocated / objs_per_zspage *
- class->pages_per_zspage;
-
- seq_printf(s, " %5u %5u %10lu %10lu %10lu\n", i,
- class->size, obj_allocated, obj_used, pages_used);
-
- total_objs += obj_allocated;
- total_used_objs += obj_used;
- total_pages += pages_used;
- }
-
- seq_puts(s, "\n");
- seq_printf(s, " %5s %5s %10lu %10lu %10lu\n", "Total", "",
- total_objs, total_used_objs, total_pages);
-
- return 0;
-}
-
-static int zs_stats_size_open(struct inode *inode, struct file *file)
-{
- return single_open(file, zs_stats_size_show, inode->i_private);
-}
-
-static const struct file_operations zs_stat_size_ops = {
- .open = zs_stats_size_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-static int zs_pool_stat_create(char *name, struct zs_pool *pool)
-{
- struct dentry *entry;
-
- if (!zs_stat_root)
- return -ENODEV;
-
- entry = debugfs_create_dir(name, zs_stat_root);
- if (!entry) {
- pr_warn("debugfs dir <%s> creation failed\n", name);
- return -ENOMEM;
- }
- pool->stat_dentry = entry;
-
- entry = debugfs_create_file("obj_in_classes", S_IFREG | S_IRUGO,
- pool->stat_dentry, pool, &zs_stat_size_ops);
- if (!entry) {
- pr_warn("%s: debugfs file entry <%s> creation failed\n",
- name, "obj_in_classes");
- return -ENOMEM;
- }
-
- return 0;
-}
-
-static void zs_pool_stat_destroy(struct zs_pool *pool)
-{
- debugfs_remove_recursive(pool->stat_dentry);
-}
-
-#else /* CONFIG_ZSMALLOC_STAT */
-
-static inline void zs_stat_inc(struct size_class *class,
- enum zs_stat_type type, unsigned long cnt)
-{
-}
-
-static inline void zs_stat_dec(struct size_class *class,
- enum zs_stat_type type, unsigned long cnt)
-{
-}
-
-static inline unsigned long zs_stat_get(struct size_class *class,
- enum zs_stat_type type)
-{
- return 0;
-}
-
-static int __init zs_stat_init(void)
-{
- return 0;
-}
-
-static void __exit zs_stat_exit(void)
-{
-}
-
-static inline int zs_pool_stat_create(char *name, struct zs_pool *pool)
-{
- return 0;
-}
-
-static inline void zs_pool_stat_destroy(struct zs_pool *pool)
-{
-}
-
-#endif
-
unsigned long zs_get_total_pages(struct zs_pool *pool)
{
return atomic_long_read(&pool->pages_allocated);
ret = __zs_map_object(area, pages, off, class->size);
out:
- return ret + ZS_HANDLE_SIZE;
+ if (!class->huge)
+ ret += ZS_HANDLE_SIZE;
+
+ return ret;
}
EXPORT_SYMBOL_GPL(zs_map_object);
vaddr = kmap_atomic(m_page);
link = (struct link_free *)vaddr + m_offset / sizeof(*link);
first_page->freelist = link->next;
- /* record handle in the header of allocated chunk */
- link->handle = handle;
+ if (!class->huge)
+ /* record handle in the header of allocated chunk */
+ link->handle = handle;
+ else
+ /* record handle in first_page->private */
+ set_page_private(first_page, handle);
kunmap_atomic(vaddr);
first_page->inuse++;
zs_stat_inc(class, OBJ_USED, 1);
struct size_class *class;
struct page *first_page;
- if (unlikely(!size || (size + ZS_HANDLE_SIZE) > ZS_MAX_ALLOC_SIZE))
+ if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE))
return 0;
handle = alloc_handle(pool);
/* Insert this object in containing zspage's freelist */
link = (struct link_free *)(vaddr + f_offset);
link->next = first_page->freelist;
+ if (class->huge)
+ set_page_private(first_page, 0);
kunmap_atomic(vaddr);
first_page->freelist = (void *)obj;
first_page->inuse--;
if (written == class->size)
break;
- if (s_off + size >= PAGE_SIZE) {
+ s_off += size;
+ s_size -= size;
+ d_off += size;
+ d_size -= size;
+
+ if (s_off >= PAGE_SIZE) {
kunmap_atomic(d_addr);
kunmap_atomic(s_addr);
s_page = get_next_page(s_page);
d_addr = kmap_atomic(d_page);
s_size = class->size - written;
s_off = 0;
- } else {
- s_off += size;
- s_size -= size;
}
- if (d_off + size >= PAGE_SIZE) {
+ if (d_off >= PAGE_SIZE) {
kunmap_atomic(d_addr);
d_page = get_next_page(d_page);
BUG_ON(!d_page);
d_addr = kmap_atomic(d_page);
d_size = class->size - written;
d_off = 0;
- } else {
- d_off += size;
- d_size -= size;
}
}
offset += class->size * index;
while (offset < PAGE_SIZE) {
- head = obj_to_head(addr + offset);
+ head = obj_to_head(class, page, addr + offset);
if (head & OBJ_ALLOCATED_TAG) {
handle = head & ~OBJ_ALLOCATED_TAG;
if (trypin_tag(handle))
static void putback_zspage(struct zs_pool *pool, struct size_class *class,
struct page *first_page)
{
- int class_idx;
enum fullness_group fullness;
BUG_ON(!is_first_page(first_page));
- get_zspage_mapping(first_page, &class_idx, &fullness);
+ fullness = get_fullness_group(first_page);
insert_zspage(first_page, class, fullness);
- fullness = fix_fullness_group(class, first_page);
+ set_zspage_mapping(first_page, class->index, fullness);
+
if (fullness == ZS_EMPTY) {
zs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage(
class->size, class->pages_per_zspage));
struct page *dst_page = NULL;
unsigned long nr_total_migrated = 0;
- cond_resched();
-
spin_lock(&class->lock);
while ((src_page = isolate_source_page(class))) {
nr_migrated += __zs_compact(pool, class);
}
- synchronize_rcu();
-
return nr_migrated;
}
EXPORT_SYMBOL_GPL(zs_compact);
class->size = size;
class->index = i;
class->pages_per_zspage = pages_per_zspage;
+ if (pages_per_zspage == 1 &&
+ get_maxobj_per_zspage(size, pages_per_zspage) == 1)
+ class->huge = true;
spin_lock_init(&class->lock);
pool->size_class[i] = class;