addb18d1b8d76c473a0624229fb193b3a245dd80
[cascardo/linux.git] / drivers / block / zram / zram_drv.c
1 /*
2  * Compressed RAM block device
3  *
4  * Copyright (C) 2008, 2009, 2010  Nitin Gupta
5  *               2012, 2013 Minchan Kim
6  *
7  * This code is released using a dual license strategy: BSD/GPL
8  * You can choose the licence that better fits your requirements.
9  *
10  * Released under the terms of 3-clause BSD License
11  * Released under the terms of GNU General Public License Version 2.0
12  *
13  */
14
15 #define KMSG_COMPONENT "zram"
16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17
18 #include <linux/module.h>
19 #include <linux/kernel.h>
20 #include <linux/bio.h>
21 #include <linux/bitops.h>
22 #include <linux/blkdev.h>
23 #include <linux/buffer_head.h>
24 #include <linux/device.h>
25 #include <linux/genhd.h>
26 #include <linux/highmem.h>
27 #include <linux/slab.h>
28 #include <linux/string.h>
29 #include <linux/vmalloc.h>
30 #include <linux/err.h>
31 #include <linux/idr.h>
32
33 #include "zram_drv.h"
34
35 static DEFINE_IDR(zram_index_idr);
36 static int zram_major;
37 static const char *default_compressor = "lzo";
38
39 /* Module params (documentation at end) */
40 static unsigned int num_devices = 1;
41
42 static inline void deprecated_attr_warn(const char *name)
43 {
44         pr_warn_once("%d (%s) Attribute %s (and others) will be removed. %s\n",
45                         task_pid_nr(current),
46                         current->comm,
47                         name,
48                         "See zram documentation.");
49 }
50
51 #define ZRAM_ATTR_RO(name)                                              \
52 static ssize_t name##_show(struct device *d,                            \
53                                 struct device_attribute *attr, char *b) \
54 {                                                                       \
55         struct zram *zram = dev_to_zram(d);                             \
56                                                                         \
57         deprecated_attr_warn(__stringify(name));                        \
58         return scnprintf(b, PAGE_SIZE, "%llu\n",                        \
59                 (u64)atomic64_read(&zram->stats.name));                 \
60 }                                                                       \
61 static DEVICE_ATTR_RO(name);
62
63 static inline bool init_done(struct zram *zram)
64 {
65         return zram->disksize;
66 }
67
68 static inline struct zram *dev_to_zram(struct device *dev)
69 {
70         return (struct zram *)dev_to_disk(dev)->private_data;
71 }
72
73 static ssize_t compact_store(struct device *dev,
74                 struct device_attribute *attr, const char *buf, size_t len)
75 {
76         unsigned long nr_migrated;
77         struct zram *zram = dev_to_zram(dev);
78         struct zram_meta *meta;
79
80         down_read(&zram->init_lock);
81         if (!init_done(zram)) {
82                 up_read(&zram->init_lock);
83                 return -EINVAL;
84         }
85
86         meta = zram->meta;
87         nr_migrated = zs_compact(meta->mem_pool);
88         atomic64_add(nr_migrated, &zram->stats.num_migrated);
89         up_read(&zram->init_lock);
90
91         return len;
92 }
93
94 static ssize_t disksize_show(struct device *dev,
95                 struct device_attribute *attr, char *buf)
96 {
97         struct zram *zram = dev_to_zram(dev);
98
99         return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
100 }
101
102 static ssize_t initstate_show(struct device *dev,
103                 struct device_attribute *attr, char *buf)
104 {
105         u32 val;
106         struct zram *zram = dev_to_zram(dev);
107
108         down_read(&zram->init_lock);
109         val = init_done(zram);
110         up_read(&zram->init_lock);
111
112         return scnprintf(buf, PAGE_SIZE, "%u\n", val);
113 }
114
115 static ssize_t orig_data_size_show(struct device *dev,
116                 struct device_attribute *attr, char *buf)
117 {
118         struct zram *zram = dev_to_zram(dev);
119
120         deprecated_attr_warn("orig_data_size");
121         return scnprintf(buf, PAGE_SIZE, "%llu\n",
122                 (u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT);
123 }
124
125 static ssize_t mem_used_total_show(struct device *dev,
126                 struct device_attribute *attr, char *buf)
127 {
128         u64 val = 0;
129         struct zram *zram = dev_to_zram(dev);
130
131         deprecated_attr_warn("mem_used_total");
132         down_read(&zram->init_lock);
133         if (init_done(zram)) {
134                 struct zram_meta *meta = zram->meta;
135                 val = zs_get_total_pages(meta->mem_pool);
136         }
137         up_read(&zram->init_lock);
138
139         return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
140 }
141
142 static ssize_t max_comp_streams_show(struct device *dev,
143                 struct device_attribute *attr, char *buf)
144 {
145         int val;
146         struct zram *zram = dev_to_zram(dev);
147
148         down_read(&zram->init_lock);
149         val = zram->max_comp_streams;
150         up_read(&zram->init_lock);
151
152         return scnprintf(buf, PAGE_SIZE, "%d\n", val);
153 }
154
155 static ssize_t mem_limit_show(struct device *dev,
156                 struct device_attribute *attr, char *buf)
157 {
158         u64 val;
159         struct zram *zram = dev_to_zram(dev);
160
161         deprecated_attr_warn("mem_limit");
162         down_read(&zram->init_lock);
163         val = zram->limit_pages;
164         up_read(&zram->init_lock);
165
166         return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
167 }
168
169 static ssize_t mem_limit_store(struct device *dev,
170                 struct device_attribute *attr, const char *buf, size_t len)
171 {
172         u64 limit;
173         char *tmp;
174         struct zram *zram = dev_to_zram(dev);
175
176         limit = memparse(buf, &tmp);
177         if (buf == tmp) /* no chars parsed, invalid input */
178                 return -EINVAL;
179
180         down_write(&zram->init_lock);
181         zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
182         up_write(&zram->init_lock);
183
184         return len;
185 }
186
187 static ssize_t mem_used_max_show(struct device *dev,
188                 struct device_attribute *attr, char *buf)
189 {
190         u64 val = 0;
191         struct zram *zram = dev_to_zram(dev);
192
193         deprecated_attr_warn("mem_used_max");
194         down_read(&zram->init_lock);
195         if (init_done(zram))
196                 val = atomic_long_read(&zram->stats.max_used_pages);
197         up_read(&zram->init_lock);
198
199         return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
200 }
201
202 static ssize_t mem_used_max_store(struct device *dev,
203                 struct device_attribute *attr, const char *buf, size_t len)
204 {
205         int err;
206         unsigned long val;
207         struct zram *zram = dev_to_zram(dev);
208
209         err = kstrtoul(buf, 10, &val);
210         if (err || val != 0)
211                 return -EINVAL;
212
213         down_read(&zram->init_lock);
214         if (init_done(zram)) {
215                 struct zram_meta *meta = zram->meta;
216                 atomic_long_set(&zram->stats.max_used_pages,
217                                 zs_get_total_pages(meta->mem_pool));
218         }
219         up_read(&zram->init_lock);
220
221         return len;
222 }
223
224 static ssize_t max_comp_streams_store(struct device *dev,
225                 struct device_attribute *attr, const char *buf, size_t len)
226 {
227         int num;
228         struct zram *zram = dev_to_zram(dev);
229         int ret;
230
231         ret = kstrtoint(buf, 0, &num);
232         if (ret < 0)
233                 return ret;
234         if (num < 1)
235                 return -EINVAL;
236
237         down_write(&zram->init_lock);
238         if (init_done(zram)) {
239                 if (!zcomp_set_max_streams(zram->comp, num)) {
240                         pr_info("Cannot change max compression streams\n");
241                         ret = -EINVAL;
242                         goto out;
243                 }
244         }
245
246         zram->max_comp_streams = num;
247         ret = len;
248 out:
249         up_write(&zram->init_lock);
250         return ret;
251 }
252
253 static ssize_t comp_algorithm_show(struct device *dev,
254                 struct device_attribute *attr, char *buf)
255 {
256         size_t sz;
257         struct zram *zram = dev_to_zram(dev);
258
259         down_read(&zram->init_lock);
260         sz = zcomp_available_show(zram->compressor, buf);
261         up_read(&zram->init_lock);
262
263         return sz;
264 }
265
266 static ssize_t comp_algorithm_store(struct device *dev,
267                 struct device_attribute *attr, const char *buf, size_t len)
268 {
269         struct zram *zram = dev_to_zram(dev);
270         down_write(&zram->init_lock);
271         if (init_done(zram)) {
272                 up_write(&zram->init_lock);
273                 pr_info("Can't change algorithm for initialized device\n");
274                 return -EBUSY;
275         }
276         strlcpy(zram->compressor, buf, sizeof(zram->compressor));
277         up_write(&zram->init_lock);
278         return len;
279 }
280
281 /* flag operations needs meta->tb_lock */
282 static int zram_test_flag(struct zram_meta *meta, u32 index,
283                         enum zram_pageflags flag)
284 {
285         return meta->table[index].value & BIT(flag);
286 }
287
288 static void zram_set_flag(struct zram_meta *meta, u32 index,
289                         enum zram_pageflags flag)
290 {
291         meta->table[index].value |= BIT(flag);
292 }
293
294 static void zram_clear_flag(struct zram_meta *meta, u32 index,
295                         enum zram_pageflags flag)
296 {
297         meta->table[index].value &= ~BIT(flag);
298 }
299
300 static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
301 {
302         return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
303 }
304
305 static void zram_set_obj_size(struct zram_meta *meta,
306                                         u32 index, size_t size)
307 {
308         unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT;
309
310         meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
311 }
312
313 static inline int is_partial_io(struct bio_vec *bvec)
314 {
315         return bvec->bv_len != PAGE_SIZE;
316 }
317
318 /*
319  * Check if request is within bounds and aligned on zram logical blocks.
320  */
321 static inline int valid_io_request(struct zram *zram,
322                 sector_t start, unsigned int size)
323 {
324         u64 end, bound;
325
326         /* unaligned request */
327         if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
328                 return 0;
329         if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
330                 return 0;
331
332         end = start + (size >> SECTOR_SHIFT);
333         bound = zram->disksize >> SECTOR_SHIFT;
334         /* out of range range */
335         if (unlikely(start >= bound || end > bound || start > end))
336                 return 0;
337
338         /* I/O request is valid */
339         return 1;
340 }
341
342 static void zram_meta_free(struct zram_meta *meta, u64 disksize)
343 {
344         size_t num_pages = disksize >> PAGE_SHIFT;
345         size_t index;
346
347         /* Free all pages that are still in this zram device */
348         for (index = 0; index < num_pages; index++) {
349                 unsigned long handle = meta->table[index].handle;
350
351                 if (!handle)
352                         continue;
353
354                 zs_free(meta->mem_pool, handle);
355         }
356
357         zs_destroy_pool(meta->mem_pool);
358         vfree(meta->table);
359         kfree(meta);
360 }
361
362 static struct zram_meta *zram_meta_alloc(int device_id, u64 disksize)
363 {
364         size_t num_pages;
365         char pool_name[8];
366         struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL);
367
368         if (!meta)
369                 return NULL;
370
371         num_pages = disksize >> PAGE_SHIFT;
372         meta->table = vzalloc(num_pages * sizeof(*meta->table));
373         if (!meta->table) {
374                 pr_err("Error allocating zram address table\n");
375                 goto out_error;
376         }
377
378         snprintf(pool_name, sizeof(pool_name), "zram%d", device_id);
379         meta->mem_pool = zs_create_pool(pool_name, GFP_NOIO | __GFP_HIGHMEM);
380         if (!meta->mem_pool) {
381                 pr_err("Error creating memory pool\n");
382                 goto out_error;
383         }
384
385         return meta;
386
387 out_error:
388         vfree(meta->table);
389         kfree(meta);
390         return NULL;
391 }
392
393 static inline bool zram_meta_get(struct zram *zram)
394 {
395         if (atomic_inc_not_zero(&zram->refcount))
396                 return true;
397         return false;
398 }
399
400 static inline void zram_meta_put(struct zram *zram)
401 {
402         atomic_dec(&zram->refcount);
403 }
404
405 static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
406 {
407         if (*offset + bvec->bv_len >= PAGE_SIZE)
408                 (*index)++;
409         *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
410 }
411
412 static int page_zero_filled(void *ptr)
413 {
414         unsigned int pos;
415         unsigned long *page;
416
417         page = (unsigned long *)ptr;
418
419         for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
420                 if (page[pos])
421                         return 0;
422         }
423
424         return 1;
425 }
426
427 static void handle_zero_page(struct bio_vec *bvec)
428 {
429         struct page *page = bvec->bv_page;
430         void *user_mem;
431
432         user_mem = kmap_atomic(page);
433         if (is_partial_io(bvec))
434                 memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
435         else
436                 clear_page(user_mem);
437         kunmap_atomic(user_mem);
438
439         flush_dcache_page(page);
440 }
441
442
443 /*
444  * To protect concurrent access to the same index entry,
445  * caller should hold this table index entry's bit_spinlock to
446  * indicate this index entry is accessing.
447  */
448 static void zram_free_page(struct zram *zram, size_t index)
449 {
450         struct zram_meta *meta = zram->meta;
451         unsigned long handle = meta->table[index].handle;
452
453         if (unlikely(!handle)) {
454                 /*
455                  * No memory is allocated for zero filled pages.
456                  * Simply clear zero page flag.
457                  */
458                 if (zram_test_flag(meta, index, ZRAM_ZERO)) {
459                         zram_clear_flag(meta, index, ZRAM_ZERO);
460                         atomic64_dec(&zram->stats.zero_pages);
461                 }
462                 return;
463         }
464
465         zs_free(meta->mem_pool, handle);
466
467         atomic64_sub(zram_get_obj_size(meta, index),
468                         &zram->stats.compr_data_size);
469         atomic64_dec(&zram->stats.pages_stored);
470
471         meta->table[index].handle = 0;
472         zram_set_obj_size(meta, index, 0);
473 }
474
475 static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
476 {
477         int ret = 0;
478         unsigned char *cmem;
479         struct zram_meta *meta = zram->meta;
480         unsigned long handle;
481         size_t size;
482
483         bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
484         handle = meta->table[index].handle;
485         size = zram_get_obj_size(meta, index);
486
487         if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
488                 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
489                 clear_page(mem);
490                 return 0;
491         }
492
493         cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
494         if (size == PAGE_SIZE)
495                 copy_page(mem, cmem);
496         else
497                 ret = zcomp_decompress(zram->comp, cmem, size, mem);
498         zs_unmap_object(meta->mem_pool, handle);
499         bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
500
501         /* Should NEVER happen. Return bio error if it does. */
502         if (unlikely(ret)) {
503                 pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
504                 return ret;
505         }
506
507         return 0;
508 }
509
510 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
511                           u32 index, int offset)
512 {
513         int ret;
514         struct page *page;
515         unsigned char *user_mem, *uncmem = NULL;
516         struct zram_meta *meta = zram->meta;
517         page = bvec->bv_page;
518
519         bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
520         if (unlikely(!meta->table[index].handle) ||
521                         zram_test_flag(meta, index, ZRAM_ZERO)) {
522                 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
523                 handle_zero_page(bvec);
524                 return 0;
525         }
526         bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
527
528         if (is_partial_io(bvec))
529                 /* Use  a temporary buffer to decompress the page */
530                 uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
531
532         user_mem = kmap_atomic(page);
533         if (!is_partial_io(bvec))
534                 uncmem = user_mem;
535
536         if (!uncmem) {
537                 pr_info("Unable to allocate temp memory\n");
538                 ret = -ENOMEM;
539                 goto out_cleanup;
540         }
541
542         ret = zram_decompress_page(zram, uncmem, index);
543         /* Should NEVER happen. Return bio error if it does. */
544         if (unlikely(ret))
545                 goto out_cleanup;
546
547         if (is_partial_io(bvec))
548                 memcpy(user_mem + bvec->bv_offset, uncmem + offset,
549                                 bvec->bv_len);
550
551         flush_dcache_page(page);
552         ret = 0;
553 out_cleanup:
554         kunmap_atomic(user_mem);
555         if (is_partial_io(bvec))
556                 kfree(uncmem);
557         return ret;
558 }
559
560 static inline void update_used_max(struct zram *zram,
561                                         const unsigned long pages)
562 {
563         unsigned long old_max, cur_max;
564
565         old_max = atomic_long_read(&zram->stats.max_used_pages);
566
567         do {
568                 cur_max = old_max;
569                 if (pages > cur_max)
570                         old_max = atomic_long_cmpxchg(
571                                 &zram->stats.max_used_pages, cur_max, pages);
572         } while (old_max != cur_max);
573 }
574
575 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
576                            int offset)
577 {
578         int ret = 0;
579         size_t clen;
580         unsigned long handle;
581         struct page *page;
582         unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
583         struct zram_meta *meta = zram->meta;
584         struct zcomp_strm *zstrm;
585         bool locked = false;
586         unsigned long alloced_pages;
587
588         page = bvec->bv_page;
589         if (is_partial_io(bvec)) {
590                 /*
591                  * This is a partial IO. We need to read the full page
592                  * before to write the changes.
593                  */
594                 uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
595                 if (!uncmem) {
596                         ret = -ENOMEM;
597                         goto out;
598                 }
599                 ret = zram_decompress_page(zram, uncmem, index);
600                 if (ret)
601                         goto out;
602         }
603
604         zstrm = zcomp_strm_find(zram->comp);
605         locked = true;
606         user_mem = kmap_atomic(page);
607
608         if (is_partial_io(bvec)) {
609                 memcpy(uncmem + offset, user_mem + bvec->bv_offset,
610                        bvec->bv_len);
611                 kunmap_atomic(user_mem);
612                 user_mem = NULL;
613         } else {
614                 uncmem = user_mem;
615         }
616
617         if (page_zero_filled(uncmem)) {
618                 if (user_mem)
619                         kunmap_atomic(user_mem);
620                 /* Free memory associated with this sector now. */
621                 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
622                 zram_free_page(zram, index);
623                 zram_set_flag(meta, index, ZRAM_ZERO);
624                 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
625
626                 atomic64_inc(&zram->stats.zero_pages);
627                 ret = 0;
628                 goto out;
629         }
630
631         ret = zcomp_compress(zram->comp, zstrm, uncmem, &clen);
632         if (!is_partial_io(bvec)) {
633                 kunmap_atomic(user_mem);
634                 user_mem = NULL;
635                 uncmem = NULL;
636         }
637
638         if (unlikely(ret)) {
639                 pr_err("Compression failed! err=%d\n", ret);
640                 goto out;
641         }
642         src = zstrm->buffer;
643         if (unlikely(clen > max_zpage_size)) {
644                 clen = PAGE_SIZE;
645                 if (is_partial_io(bvec))
646                         src = uncmem;
647         }
648
649         handle = zs_malloc(meta->mem_pool, clen);
650         if (!handle) {
651                 pr_info("Error allocating memory for compressed page: %u, size=%zu\n",
652                         index, clen);
653                 ret = -ENOMEM;
654                 goto out;
655         }
656
657         alloced_pages = zs_get_total_pages(meta->mem_pool);
658         if (zram->limit_pages && alloced_pages > zram->limit_pages) {
659                 zs_free(meta->mem_pool, handle);
660                 ret = -ENOMEM;
661                 goto out;
662         }
663
664         update_used_max(zram, alloced_pages);
665
666         cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);
667
668         if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
669                 src = kmap_atomic(page);
670                 copy_page(cmem, src);
671                 kunmap_atomic(src);
672         } else {
673                 memcpy(cmem, src, clen);
674         }
675
676         zcomp_strm_release(zram->comp, zstrm);
677         locked = false;
678         zs_unmap_object(meta->mem_pool, handle);
679
680         /*
681          * Free memory associated with this sector
682          * before overwriting unused sectors.
683          */
684         bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
685         zram_free_page(zram, index);
686
687         meta->table[index].handle = handle;
688         zram_set_obj_size(meta, index, clen);
689         bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
690
691         /* Update stats */
692         atomic64_add(clen, &zram->stats.compr_data_size);
693         atomic64_inc(&zram->stats.pages_stored);
694 out:
695         if (locked)
696                 zcomp_strm_release(zram->comp, zstrm);
697         if (is_partial_io(bvec))
698                 kfree(uncmem);
699         return ret;
700 }
701
702 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
703                         int offset, int rw)
704 {
705         unsigned long start_time = jiffies;
706         int ret;
707
708         generic_start_io_acct(rw, bvec->bv_len >> SECTOR_SHIFT,
709                         &zram->disk->part0);
710
711         if (rw == READ) {
712                 atomic64_inc(&zram->stats.num_reads);
713                 ret = zram_bvec_read(zram, bvec, index, offset);
714         } else {
715                 atomic64_inc(&zram->stats.num_writes);
716                 ret = zram_bvec_write(zram, bvec, index, offset);
717         }
718
719         generic_end_io_acct(rw, &zram->disk->part0, start_time);
720
721         if (unlikely(ret)) {
722                 if (rw == READ)
723                         atomic64_inc(&zram->stats.failed_reads);
724                 else
725                         atomic64_inc(&zram->stats.failed_writes);
726         }
727
728         return ret;
729 }
730
731 /*
732  * zram_bio_discard - handler on discard request
733  * @index: physical block index in PAGE_SIZE units
734  * @offset: byte offset within physical block
735  */
736 static void zram_bio_discard(struct zram *zram, u32 index,
737                              int offset, struct bio *bio)
738 {
739         size_t n = bio->bi_iter.bi_size;
740         struct zram_meta *meta = zram->meta;
741
742         /*
743          * zram manages data in physical block size units. Because logical block
744          * size isn't identical with physical block size on some arch, we
745          * could get a discard request pointing to a specific offset within a
746          * certain physical block.  Although we can handle this request by
747          * reading that physiclal block and decompressing and partially zeroing
748          * and re-compressing and then re-storing it, this isn't reasonable
749          * because our intent with a discard request is to save memory.  So
750          * skipping this logical block is appropriate here.
751          */
752         if (offset) {
753                 if (n <= (PAGE_SIZE - offset))
754                         return;
755
756                 n -= (PAGE_SIZE - offset);
757                 index++;
758         }
759
760         while (n >= PAGE_SIZE) {
761                 bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
762                 zram_free_page(zram, index);
763                 bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
764                 atomic64_inc(&zram->stats.notify_free);
765                 index++;
766                 n -= PAGE_SIZE;
767         }
768 }
769
770 static void zram_reset_device(struct zram *zram)
771 {
772         struct zram_meta *meta;
773         struct zcomp *comp;
774         u64 disksize;
775
776         down_write(&zram->init_lock);
777
778         zram->limit_pages = 0;
779
780         if (!init_done(zram)) {
781                 up_write(&zram->init_lock);
782                 return;
783         }
784
785         meta = zram->meta;
786         comp = zram->comp;
787         disksize = zram->disksize;
788         /*
789          * Refcount will go down to 0 eventually and r/w handler
790          * cannot handle further I/O so it will bail out by
791          * check zram_meta_get.
792          */
793         zram_meta_put(zram);
794         /*
795          * We want to free zram_meta in process context to avoid
796          * deadlock between reclaim path and any other locks.
797          */
798         wait_event(zram->io_done, atomic_read(&zram->refcount) == 0);
799
800         /* Reset stats */
801         memset(&zram->stats, 0, sizeof(zram->stats));
802         zram->disksize = 0;
803         zram->max_comp_streams = 1;
804
805         set_capacity(zram->disk, 0);
806         part_stat_set_all(&zram->disk->part0, 0);
807
808         up_write(&zram->init_lock);
809         /* I/O operation under all of CPU are done so let's free */
810         zram_meta_free(meta, disksize);
811         zcomp_destroy(comp);
812 }
813
814 static ssize_t disksize_store(struct device *dev,
815                 struct device_attribute *attr, const char *buf, size_t len)
816 {
817         u64 disksize;
818         struct zcomp *comp;
819         struct zram_meta *meta;
820         struct zram *zram = dev_to_zram(dev);
821         int err;
822
823         disksize = memparse(buf, NULL);
824         if (!disksize)
825                 return -EINVAL;
826
827         disksize = PAGE_ALIGN(disksize);
828         meta = zram_meta_alloc(zram->disk->first_minor, disksize);
829         if (!meta)
830                 return -ENOMEM;
831
832         comp = zcomp_create(zram->compressor, zram->max_comp_streams);
833         if (IS_ERR(comp)) {
834                 pr_info("Cannot initialise %s compressing backend\n",
835                                 zram->compressor);
836                 err = PTR_ERR(comp);
837                 goto out_free_meta;
838         }
839
840         down_write(&zram->init_lock);
841         if (init_done(zram)) {
842                 pr_info("Cannot change disksize for initialized device\n");
843                 err = -EBUSY;
844                 goto out_destroy_comp;
845         }
846
847         init_waitqueue_head(&zram->io_done);
848         atomic_set(&zram->refcount, 1);
849         zram->meta = meta;
850         zram->comp = comp;
851         zram->disksize = disksize;
852         set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
853         up_write(&zram->init_lock);
854
855         /*
856          * Revalidate disk out of the init_lock to avoid lockdep splat.
857          * It's okay because disk's capacity is protected by init_lock
858          * so that revalidate_disk always sees up-to-date capacity.
859          */
860         revalidate_disk(zram->disk);
861
862         return len;
863
864 out_destroy_comp:
865         up_write(&zram->init_lock);
866         zcomp_destroy(comp);
867 out_free_meta:
868         zram_meta_free(meta, disksize);
869         return err;
870 }
871
872 static ssize_t reset_store(struct device *dev,
873                 struct device_attribute *attr, const char *buf, size_t len)
874 {
875         int ret;
876         unsigned short do_reset;
877         struct zram *zram;
878         struct block_device *bdev;
879
880         zram = dev_to_zram(dev);
881         bdev = bdget_disk(zram->disk, 0);
882
883         if (!bdev)
884                 return -ENOMEM;
885
886         mutex_lock(&bdev->bd_mutex);
887         /* Do not reset an active device! */
888         if (bdev->bd_openers) {
889                 ret = -EBUSY;
890                 goto out;
891         }
892
893         ret = kstrtou16(buf, 10, &do_reset);
894         if (ret)
895                 goto out;
896
897         if (!do_reset) {
898                 ret = -EINVAL;
899                 goto out;
900         }
901
902         /* Make sure all pending I/O is finished */
903         fsync_bdev(bdev);
904         zram_reset_device(zram);
905
906         mutex_unlock(&bdev->bd_mutex);
907         revalidate_disk(zram->disk);
908         bdput(bdev);
909
910         return len;
911
912 out:
913         mutex_unlock(&bdev->bd_mutex);
914         bdput(bdev);
915         return ret;
916 }
917
918 static void __zram_make_request(struct zram *zram, struct bio *bio)
919 {
920         int offset, rw;
921         u32 index;
922         struct bio_vec bvec;
923         struct bvec_iter iter;
924
925         index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
926         offset = (bio->bi_iter.bi_sector &
927                   (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
928
929         if (unlikely(bio->bi_rw & REQ_DISCARD)) {
930                 zram_bio_discard(zram, index, offset, bio);
931                 bio_endio(bio, 0);
932                 return;
933         }
934
935         rw = bio_data_dir(bio);
936         bio_for_each_segment(bvec, bio, iter) {
937                 int max_transfer_size = PAGE_SIZE - offset;
938
939                 if (bvec.bv_len > max_transfer_size) {
940                         /*
941                          * zram_bvec_rw() can only make operation on a single
942                          * zram page. Split the bio vector.
943                          */
944                         struct bio_vec bv;
945
946                         bv.bv_page = bvec.bv_page;
947                         bv.bv_len = max_transfer_size;
948                         bv.bv_offset = bvec.bv_offset;
949
950                         if (zram_bvec_rw(zram, &bv, index, offset, rw) < 0)
951                                 goto out;
952
953                         bv.bv_len = bvec.bv_len - max_transfer_size;
954                         bv.bv_offset += max_transfer_size;
955                         if (zram_bvec_rw(zram, &bv, index + 1, 0, rw) < 0)
956                                 goto out;
957                 } else
958                         if (zram_bvec_rw(zram, &bvec, index, offset, rw) < 0)
959                                 goto out;
960
961                 update_position(&index, &offset, &bvec);
962         }
963
964         set_bit(BIO_UPTODATE, &bio->bi_flags);
965         bio_endio(bio, 0);
966         return;
967
968 out:
969         bio_io_error(bio);
970 }
971
972 /*
973  * Handler function for all zram I/O requests.
974  */
975 static void zram_make_request(struct request_queue *queue, struct bio *bio)
976 {
977         struct zram *zram = queue->queuedata;
978
979         if (unlikely(!zram_meta_get(zram)))
980                 goto error;
981
982         if (!valid_io_request(zram, bio->bi_iter.bi_sector,
983                                         bio->bi_iter.bi_size)) {
984                 atomic64_inc(&zram->stats.invalid_io);
985                 goto put_zram;
986         }
987
988         __zram_make_request(zram, bio);
989         zram_meta_put(zram);
990         return;
991 put_zram:
992         zram_meta_put(zram);
993 error:
994         bio_io_error(bio);
995 }
996
997 static void zram_slot_free_notify(struct block_device *bdev,
998                                 unsigned long index)
999 {
1000         struct zram *zram;
1001         struct zram_meta *meta;
1002
1003         zram = bdev->bd_disk->private_data;
1004         meta = zram->meta;
1005
1006         bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
1007         zram_free_page(zram, index);
1008         bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
1009         atomic64_inc(&zram->stats.notify_free);
1010 }
1011
1012 static int zram_rw_page(struct block_device *bdev, sector_t sector,
1013                        struct page *page, int rw)
1014 {
1015         int offset, err = -EIO;
1016         u32 index;
1017         struct zram *zram;
1018         struct bio_vec bv;
1019
1020         zram = bdev->bd_disk->private_data;
1021         if (unlikely(!zram_meta_get(zram)))
1022                 goto out;
1023
1024         if (!valid_io_request(zram, sector, PAGE_SIZE)) {
1025                 atomic64_inc(&zram->stats.invalid_io);
1026                 err = -EINVAL;
1027                 goto put_zram;
1028         }
1029
1030         index = sector >> SECTORS_PER_PAGE_SHIFT;
1031         offset = sector & (SECTORS_PER_PAGE - 1) << SECTOR_SHIFT;
1032
1033         bv.bv_page = page;
1034         bv.bv_len = PAGE_SIZE;
1035         bv.bv_offset = 0;
1036
1037         err = zram_bvec_rw(zram, &bv, index, offset, rw);
1038 put_zram:
1039         zram_meta_put(zram);
1040 out:
1041         /*
1042          * If I/O fails, just return error(ie, non-zero) without
1043          * calling page_endio.
1044          * It causes resubmit the I/O with bio request by upper functions
1045          * of rw_page(e.g., swap_readpage, __swap_writepage) and
1046          * bio->bi_end_io does things to handle the error
1047          * (e.g., SetPageError, set_page_dirty and extra works).
1048          */
1049         if (err == 0)
1050                 page_endio(page, rw, 0);
1051         return err;
1052 }
1053
1054 static const struct block_device_operations zram_devops = {
1055         .swap_slot_free_notify = zram_slot_free_notify,
1056         .rw_page = zram_rw_page,
1057         .owner = THIS_MODULE
1058 };
1059
1060 static DEVICE_ATTR_WO(compact);
1061 static DEVICE_ATTR_RW(disksize);
1062 static DEVICE_ATTR_RO(initstate);
1063 static DEVICE_ATTR_WO(reset);
1064 static DEVICE_ATTR_RO(orig_data_size);
1065 static DEVICE_ATTR_RO(mem_used_total);
1066 static DEVICE_ATTR_RW(mem_limit);
1067 static DEVICE_ATTR_RW(mem_used_max);
1068 static DEVICE_ATTR_RW(max_comp_streams);
1069 static DEVICE_ATTR_RW(comp_algorithm);
1070
1071 static ssize_t io_stat_show(struct device *dev,
1072                 struct device_attribute *attr, char *buf)
1073 {
1074         struct zram *zram = dev_to_zram(dev);
1075         ssize_t ret;
1076
1077         down_read(&zram->init_lock);
1078         ret = scnprintf(buf, PAGE_SIZE,
1079                         "%8llu %8llu %8llu %8llu\n",
1080                         (u64)atomic64_read(&zram->stats.failed_reads),
1081                         (u64)atomic64_read(&zram->stats.failed_writes),
1082                         (u64)atomic64_read(&zram->stats.invalid_io),
1083                         (u64)atomic64_read(&zram->stats.notify_free));
1084         up_read(&zram->init_lock);
1085
1086         return ret;
1087 }
1088
1089 static ssize_t mm_stat_show(struct device *dev,
1090                 struct device_attribute *attr, char *buf)
1091 {
1092         struct zram *zram = dev_to_zram(dev);
1093         u64 orig_size, mem_used = 0;
1094         long max_used;
1095         ssize_t ret;
1096
1097         down_read(&zram->init_lock);
1098         if (init_done(zram))
1099                 mem_used = zs_get_total_pages(zram->meta->mem_pool);
1100
1101         orig_size = atomic64_read(&zram->stats.pages_stored);
1102         max_used = atomic_long_read(&zram->stats.max_used_pages);
1103
1104         ret = scnprintf(buf, PAGE_SIZE,
1105                         "%8llu %8llu %8llu %8lu %8ld %8llu %8llu\n",
1106                         orig_size << PAGE_SHIFT,
1107                         (u64)atomic64_read(&zram->stats.compr_data_size),
1108                         mem_used << PAGE_SHIFT,
1109                         zram->limit_pages << PAGE_SHIFT,
1110                         max_used << PAGE_SHIFT,
1111                         (u64)atomic64_read(&zram->stats.zero_pages),
1112                         (u64)atomic64_read(&zram->stats.num_migrated));
1113         up_read(&zram->init_lock);
1114
1115         return ret;
1116 }
1117
1118 static DEVICE_ATTR_RO(io_stat);
1119 static DEVICE_ATTR_RO(mm_stat);
1120 ZRAM_ATTR_RO(num_reads);
1121 ZRAM_ATTR_RO(num_writes);
1122 ZRAM_ATTR_RO(failed_reads);
1123 ZRAM_ATTR_RO(failed_writes);
1124 ZRAM_ATTR_RO(invalid_io);
1125 ZRAM_ATTR_RO(notify_free);
1126 ZRAM_ATTR_RO(zero_pages);
1127 ZRAM_ATTR_RO(compr_data_size);
1128
1129 static struct attribute *zram_disk_attrs[] = {
1130         &dev_attr_disksize.attr,
1131         &dev_attr_initstate.attr,
1132         &dev_attr_reset.attr,
1133         &dev_attr_num_reads.attr,
1134         &dev_attr_num_writes.attr,
1135         &dev_attr_failed_reads.attr,
1136         &dev_attr_failed_writes.attr,
1137         &dev_attr_compact.attr,
1138         &dev_attr_invalid_io.attr,
1139         &dev_attr_notify_free.attr,
1140         &dev_attr_zero_pages.attr,
1141         &dev_attr_orig_data_size.attr,
1142         &dev_attr_compr_data_size.attr,
1143         &dev_attr_mem_used_total.attr,
1144         &dev_attr_mem_limit.attr,
1145         &dev_attr_mem_used_max.attr,
1146         &dev_attr_max_comp_streams.attr,
1147         &dev_attr_comp_algorithm.attr,
1148         &dev_attr_io_stat.attr,
1149         &dev_attr_mm_stat.attr,
1150         NULL,
1151 };
1152
1153 static struct attribute_group zram_disk_attr_group = {
1154         .attrs = zram_disk_attrs,
1155 };
1156
1157 static int zram_add(int device_id)
1158 {
1159         struct zram *zram;
1160         struct request_queue *queue;
1161         int ret;
1162
1163         zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
1164         if (!zram)
1165                 return -ENOMEM;
1166
1167         ret = idr_alloc(&zram_index_idr, zram, device_id,
1168                         device_id + 1, GFP_KERNEL);
1169         if (ret < 0)
1170                 goto out_free_dev;
1171
1172         init_rwsem(&zram->init_lock);
1173
1174         queue = blk_alloc_queue(GFP_KERNEL);
1175         if (!queue) {
1176                 pr_err("Error allocating disk queue for device %d\n",
1177                         device_id);
1178                 ret = -ENOMEM;
1179                 goto out_free_idr;
1180         }
1181
1182         blk_queue_make_request(queue, zram_make_request);
1183
1184         /* gendisk structure */
1185         zram->disk = alloc_disk(1);
1186         if (!zram->disk) {
1187                 pr_warn("Error allocating disk structure for device %d\n",
1188                         device_id);
1189                 ret = -ENOMEM;
1190                 goto out_free_queue;
1191         }
1192
1193         zram->disk->major = zram_major;
1194         zram->disk->first_minor = device_id;
1195         zram->disk->fops = &zram_devops;
1196         zram->disk->queue = queue;
1197         zram->disk->queue->queuedata = zram;
1198         zram->disk->private_data = zram;
1199         snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
1200
1201         /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
1202         set_capacity(zram->disk, 0);
1203         /* zram devices sort of resembles non-rotational disks */
1204         queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
1205         queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
1206         /*
1207          * To ensure that we always get PAGE_SIZE aligned
1208          * and n*PAGE_SIZED sized I/O requests.
1209          */
1210         blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
1211         blk_queue_logical_block_size(zram->disk->queue,
1212                                         ZRAM_LOGICAL_BLOCK_SIZE);
1213         blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
1214         blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
1215         zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
1216         zram->disk->queue->limits.max_discard_sectors = UINT_MAX;
1217         /*
1218          * zram_bio_discard() will clear all logical blocks if logical block
1219          * size is identical with physical block size(PAGE_SIZE). But if it is
1220          * different, we will skip discarding some parts of logical blocks in
1221          * the part of the request range which isn't aligned to physical block
1222          * size.  So we can't ensure that all discarded logical blocks are
1223          * zeroed.
1224          */
1225         if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
1226                 zram->disk->queue->limits.discard_zeroes_data = 1;
1227         else
1228                 zram->disk->queue->limits.discard_zeroes_data = 0;
1229         queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
1230
1231         add_disk(zram->disk);
1232
1233         ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj,
1234                                 &zram_disk_attr_group);
1235         if (ret < 0) {
1236                 pr_warn("Error creating sysfs group");
1237                 goto out_free_disk;
1238         }
1239         strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
1240         zram->meta = NULL;
1241         zram->max_comp_streams = 1;
1242         return 0;
1243
1244 out_free_disk:
1245         del_gendisk(zram->disk);
1246         put_disk(zram->disk);
1247 out_free_queue:
1248         blk_cleanup_queue(queue);
1249 out_free_idr:
1250         idr_remove(&zram_index_idr, device_id);
1251 out_free_dev:
1252         kfree(zram);
1253         return ret;
1254 }
1255
1256 static void zram_remove(struct zram *zram)
1257 {
1258         /*
1259          * Remove sysfs first, so no one will perform a disksize
1260          * store while we destroy the devices
1261          */
1262         sysfs_remove_group(&disk_to_dev(zram->disk)->kobj,
1263                         &zram_disk_attr_group);
1264
1265         zram_reset_device(zram);
1266         idr_remove(&zram_index_idr, zram->disk->first_minor);
1267         blk_cleanup_queue(zram->disk->queue);
1268         del_gendisk(zram->disk);
1269         put_disk(zram->disk);
1270         kfree(zram);
1271 }
1272
1273 static int zram_remove_cb(int id, void *ptr, void *data)
1274 {
1275         zram_remove(ptr);
1276         return 0;
1277 }
1278
1279 static void destroy_devices(void)
1280 {
1281         idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
1282         idr_destroy(&zram_index_idr);
1283         unregister_blkdev(zram_major, "zram");
1284         pr_info("Destroyed device(s)\n");
1285 }
1286
1287 static int __init zram_init(void)
1288 {
1289         int ret, dev_id;
1290
1291         if (num_devices > max_num_devices) {
1292                 pr_warn("Invalid value for num_devices: %u\n",
1293                                 num_devices);
1294                 return -EINVAL;
1295         }
1296
1297         zram_major = register_blkdev(0, "zram");
1298         if (zram_major <= 0) {
1299                 pr_warn("Unable to get major number\n");
1300                 return -EBUSY;
1301         }
1302
1303         for (dev_id = 0; dev_id < num_devices; dev_id++) {
1304                 ret = zram_add(dev_id);
1305                 if (ret != 0)
1306                         goto out_error;
1307         }
1308
1309         pr_info("Created %u device(s)\n", num_devices);
1310         return 0;
1311
1312 out_error:
1313         destroy_devices();
1314         return ret;
1315 }
1316
1317 static void __exit zram_exit(void)
1318 {
1319         destroy_devices();
1320 }
1321
1322 module_init(zram_init);
1323 module_exit(zram_exit);
1324
1325 module_param(num_devices, uint, 0);
1326 MODULE_PARM_DESC(num_devices, "Number of zram devices");
1327
1328 MODULE_LICENSE("Dual BSD/GPL");
1329 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
1330 MODULE_DESCRIPTION("Compressed RAM Block Device");