64a888a5e9b3d4ba41901843b557092e725f0717
[cascardo/linux.git] / drivers / lightnvm / rrpc.c
1 /*
2  * Copyright (C) 2015 IT University of Copenhagen
3  * Initial release: Matias Bjorling <m@bjorling.me>
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version
7  * 2 as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  *
14  * Implementation of a Round-robin page-based Hybrid FTL for Open-channel SSDs.
15  */
16
17 #include "rrpc.h"
18
19 static struct kmem_cache *rrpc_gcb_cache, *rrpc_rq_cache;
20 static DECLARE_RWSEM(rrpc_lock);
21
22 static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
23                                 struct nvm_rq *rqd, unsigned long flags);
24
25 #define rrpc_for_each_lun(rrpc, rlun, i) \
26                 for ((i) = 0, rlun = &(rrpc)->luns[0]; \
27                         (i) < (rrpc)->nr_luns; (i)++, rlun = &(rrpc)->luns[(i)])
28
29 static void rrpc_page_invalidate(struct rrpc *rrpc, struct rrpc_addr *a)
30 {
31         struct rrpc_block *rblk = a->rblk;
32         unsigned int pg_offset;
33
34         lockdep_assert_held(&rrpc->rev_lock);
35
36         if (a->addr == ADDR_EMPTY || !rblk)
37                 return;
38
39         spin_lock(&rblk->lock);
40
41         div_u64_rem(a->addr, rrpc->dev->pgs_per_blk, &pg_offset);
42         WARN_ON(test_and_set_bit(pg_offset, rblk->invalid_pages));
43         rblk->nr_invalid_pages++;
44
45         spin_unlock(&rblk->lock);
46
47         rrpc->rev_trans_map[a->addr - rrpc->poffset].addr = ADDR_EMPTY;
48 }
49
50 static void rrpc_invalidate_range(struct rrpc *rrpc, sector_t slba,
51                                                                 unsigned len)
52 {
53         sector_t i;
54
55         spin_lock(&rrpc->rev_lock);
56         for (i = slba; i < slba + len; i++) {
57                 struct rrpc_addr *gp = &rrpc->trans_map[i];
58
59                 rrpc_page_invalidate(rrpc, gp);
60                 gp->rblk = NULL;
61         }
62         spin_unlock(&rrpc->rev_lock);
63 }
64
65 static struct nvm_rq *rrpc_inflight_laddr_acquire(struct rrpc *rrpc,
66                                         sector_t laddr, unsigned int pages)
67 {
68         struct nvm_rq *rqd;
69         struct rrpc_inflight_rq *inf;
70
71         rqd = mempool_alloc(rrpc->rq_pool, GFP_ATOMIC);
72         if (!rqd)
73                 return ERR_PTR(-ENOMEM);
74
75         inf = rrpc_get_inflight_rq(rqd);
76         if (rrpc_lock_laddr(rrpc, laddr, pages, inf)) {
77                 mempool_free(rqd, rrpc->rq_pool);
78                 return NULL;
79         }
80
81         return rqd;
82 }
83
84 static void rrpc_inflight_laddr_release(struct rrpc *rrpc, struct nvm_rq *rqd)
85 {
86         struct rrpc_inflight_rq *inf = rrpc_get_inflight_rq(rqd);
87
88         rrpc_unlock_laddr(rrpc, inf);
89
90         mempool_free(rqd, rrpc->rq_pool);
91 }
92
93 static void rrpc_discard(struct rrpc *rrpc, struct bio *bio)
94 {
95         sector_t slba = bio->bi_iter.bi_sector / NR_PHY_IN_LOG;
96         sector_t len = bio->bi_iter.bi_size / RRPC_EXPOSED_PAGE_SIZE;
97         struct nvm_rq *rqd;
98
99         do {
100                 rqd = rrpc_inflight_laddr_acquire(rrpc, slba, len);
101                 schedule();
102         } while (!rqd);
103
104         if (IS_ERR(rqd)) {
105                 pr_err("rrpc: unable to acquire inflight IO\n");
106                 bio_io_error(bio);
107                 return;
108         }
109
110         rrpc_invalidate_range(rrpc, slba, len);
111         rrpc_inflight_laddr_release(rrpc, rqd);
112 }
113
114 static int block_is_full(struct rrpc *rrpc, struct rrpc_block *rblk)
115 {
116         return (rblk->next_page == rrpc->dev->pgs_per_blk);
117 }
118
119 static u64 block_to_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
120 {
121         struct nvm_block *blk = rblk->parent;
122
123         return blk->id * rrpc->dev->pgs_per_blk;
124 }
125
126 static struct ppa_addr rrpc_ppa_to_gaddr(struct nvm_dev *dev, u64 addr)
127 {
128         struct ppa_addr paddr;
129
130         paddr.ppa = addr;
131         return __linear_to_generic_addr(dev, paddr);
132 }
133
134 /* requires lun->lock taken */
135 static void rrpc_set_lun_cur(struct rrpc_lun *rlun, struct rrpc_block *rblk)
136 {
137         struct rrpc *rrpc = rlun->rrpc;
138
139         BUG_ON(!rblk);
140
141         if (rlun->cur) {
142                 spin_lock(&rlun->cur->lock);
143                 WARN_ON(!block_is_full(rrpc, rlun->cur));
144                 spin_unlock(&rlun->cur->lock);
145         }
146         rlun->cur = rblk;
147 }
148
149 static struct rrpc_block *rrpc_get_blk(struct rrpc *rrpc, struct rrpc_lun *rlun,
150                                                         unsigned long flags)
151 {
152         struct nvm_block *blk;
153         struct rrpc_block *rblk;
154
155         blk = nvm_get_blk(rrpc->dev, rlun->parent, 0);
156         if (!blk)
157                 return NULL;
158
159         rblk = &rlun->blocks[blk->id];
160         blk->priv = rblk;
161
162         bitmap_zero(rblk->invalid_pages, rrpc->dev->pgs_per_blk);
163         rblk->next_page = 0;
164         rblk->nr_invalid_pages = 0;
165         atomic_set(&rblk->data_cmnt_size, 0);
166
167         return rblk;
168 }
169
170 static void rrpc_put_blk(struct rrpc *rrpc, struct rrpc_block *rblk)
171 {
172         nvm_put_blk(rrpc->dev, rblk->parent);
173 }
174
175 static struct rrpc_lun *get_next_lun(struct rrpc *rrpc)
176 {
177         int next = atomic_inc_return(&rrpc->next_lun);
178
179         return &rrpc->luns[next % rrpc->nr_luns];
180 }
181
182 static void rrpc_gc_kick(struct rrpc *rrpc)
183 {
184         struct rrpc_lun *rlun;
185         unsigned int i;
186
187         for (i = 0; i < rrpc->nr_luns; i++) {
188                 rlun = &rrpc->luns[i];
189                 queue_work(rrpc->krqd_wq, &rlun->ws_gc);
190         }
191 }
192
193 /*
194  * timed GC every interval.
195  */
196 static void rrpc_gc_timer(unsigned long data)
197 {
198         struct rrpc *rrpc = (struct rrpc *)data;
199
200         rrpc_gc_kick(rrpc);
201         mod_timer(&rrpc->gc_timer, jiffies + msecs_to_jiffies(10));
202 }
203
204 static void rrpc_end_sync_bio(struct bio *bio)
205 {
206         struct completion *waiting = bio->bi_private;
207
208         if (bio->bi_error)
209                 pr_err("nvm: gc request failed (%u).\n", bio->bi_error);
210
211         complete(waiting);
212 }
213
214 /*
215  * rrpc_move_valid_pages -- migrate live data off the block
216  * @rrpc: the 'rrpc' structure
217  * @block: the block from which to migrate live pages
218  *
219  * Description:
220  *   GC algorithms may call this function to migrate remaining live
221  *   pages off the block prior to erasing it. This function blocks
222  *   further execution until the operation is complete.
223  */
224 static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
225 {
226         struct request_queue *q = rrpc->dev->q;
227         struct rrpc_rev_addr *rev;
228         struct nvm_rq *rqd;
229         struct bio *bio;
230         struct page *page;
231         int slot;
232         int nr_pgs_per_blk = rrpc->dev->pgs_per_blk;
233         u64 phys_addr;
234         DECLARE_COMPLETION_ONSTACK(wait);
235
236         if (bitmap_full(rblk->invalid_pages, nr_pgs_per_blk))
237                 return 0;
238
239         bio = bio_alloc(GFP_NOIO, 1);
240         if (!bio) {
241                 pr_err("nvm: could not alloc bio to gc\n");
242                 return -ENOMEM;
243         }
244
245         page = mempool_alloc(rrpc->page_pool, GFP_NOIO);
246
247         while ((slot = find_first_zero_bit(rblk->invalid_pages,
248                                             nr_pgs_per_blk)) < nr_pgs_per_blk) {
249
250                 /* Lock laddr */
251                 phys_addr = (rblk->parent->id * nr_pgs_per_blk) + slot;
252
253 try:
254                 spin_lock(&rrpc->rev_lock);
255                 /* Get logical address from physical to logical table */
256                 rev = &rrpc->rev_trans_map[phys_addr - rrpc->poffset];
257                 /* already updated by previous regular write */
258                 if (rev->addr == ADDR_EMPTY) {
259                         spin_unlock(&rrpc->rev_lock);
260                         continue;
261                 }
262
263                 rqd = rrpc_inflight_laddr_acquire(rrpc, rev->addr, 1);
264                 if (IS_ERR_OR_NULL(rqd)) {
265                         spin_unlock(&rrpc->rev_lock);
266                         schedule();
267                         goto try;
268                 }
269
270                 spin_unlock(&rrpc->rev_lock);
271
272                 /* Perform read to do GC */
273                 bio->bi_iter.bi_sector = rrpc_get_sector(rev->addr);
274                 bio->bi_rw = READ;
275                 bio->bi_private = &wait;
276                 bio->bi_end_io = rrpc_end_sync_bio;
277
278                 /* TODO: may fail when EXP_PG_SIZE > PAGE_SIZE */
279                 bio_add_pc_page(q, bio, page, RRPC_EXPOSED_PAGE_SIZE, 0);
280
281                 if (rrpc_submit_io(rrpc, bio, rqd, NVM_IOTYPE_GC)) {
282                         pr_err("rrpc: gc read failed.\n");
283                         rrpc_inflight_laddr_release(rrpc, rqd);
284                         goto finished;
285                 }
286                 wait_for_completion_io(&wait);
287
288                 bio_reset(bio);
289                 reinit_completion(&wait);
290
291                 bio->bi_iter.bi_sector = rrpc_get_sector(rev->addr);
292                 bio->bi_rw = WRITE;
293                 bio->bi_private = &wait;
294                 bio->bi_end_io = rrpc_end_sync_bio;
295
296                 bio_add_pc_page(q, bio, page, RRPC_EXPOSED_PAGE_SIZE, 0);
297
298                 /* turn the command around and write the data back to a new
299                  * address
300                  */
301                 if (rrpc_submit_io(rrpc, bio, rqd, NVM_IOTYPE_GC)) {
302                         pr_err("rrpc: gc write failed.\n");
303                         rrpc_inflight_laddr_release(rrpc, rqd);
304                         goto finished;
305                 }
306                 wait_for_completion_io(&wait);
307
308                 rrpc_inflight_laddr_release(rrpc, rqd);
309
310                 bio_reset(bio);
311         }
312
313 finished:
314         mempool_free(page, rrpc->page_pool);
315         bio_put(bio);
316
317         if (!bitmap_full(rblk->invalid_pages, nr_pgs_per_blk)) {
318                 pr_err("nvm: failed to garbage collect block\n");
319                 return -EIO;
320         }
321
322         return 0;
323 }
324
325 static void rrpc_block_gc(struct work_struct *work)
326 {
327         struct rrpc_block_gc *gcb = container_of(work, struct rrpc_block_gc,
328                                                                         ws_gc);
329         struct rrpc *rrpc = gcb->rrpc;
330         struct rrpc_block *rblk = gcb->rblk;
331         struct nvm_dev *dev = rrpc->dev;
332
333         pr_debug("nvm: block '%lu' being reclaimed\n", rblk->parent->id);
334
335         if (rrpc_move_valid_pages(rrpc, rblk))
336                 goto done;
337
338         nvm_erase_blk(dev, rblk->parent);
339         rrpc_put_blk(rrpc, rblk);
340 done:
341         mempool_free(gcb, rrpc->gcb_pool);
342 }
343
344 /* the block with highest number of invalid pages, will be in the beginning
345  * of the list
346  */
347 static struct rrpc_block *rblock_max_invalid(struct rrpc_block *ra,
348                                                         struct rrpc_block *rb)
349 {
350         if (ra->nr_invalid_pages == rb->nr_invalid_pages)
351                 return ra;
352
353         return (ra->nr_invalid_pages < rb->nr_invalid_pages) ? rb : ra;
354 }
355
356 /* linearly find the block with highest number of invalid pages
357  * requires lun->lock
358  */
359 static struct rrpc_block *block_prio_find_max(struct rrpc_lun *rlun)
360 {
361         struct list_head *prio_list = &rlun->prio_list;
362         struct rrpc_block *rblock, *max;
363
364         BUG_ON(list_empty(prio_list));
365
366         max = list_first_entry(prio_list, struct rrpc_block, prio);
367         list_for_each_entry(rblock, prio_list, prio)
368                 max = rblock_max_invalid(max, rblock);
369
370         return max;
371 }
372
373 static void rrpc_lun_gc(struct work_struct *work)
374 {
375         struct rrpc_lun *rlun = container_of(work, struct rrpc_lun, ws_gc);
376         struct rrpc *rrpc = rlun->rrpc;
377         struct nvm_lun *lun = rlun->parent;
378         struct rrpc_block_gc *gcb;
379         unsigned int nr_blocks_need;
380
381         nr_blocks_need = rrpc->dev->blks_per_lun / GC_LIMIT_INVERSE;
382
383         if (nr_blocks_need < rrpc->nr_luns)
384                 nr_blocks_need = rrpc->nr_luns;
385
386         spin_lock(&lun->lock);
387         while (nr_blocks_need > lun->nr_free_blocks &&
388                                         !list_empty(&rlun->prio_list)) {
389                 struct rrpc_block *rblock = block_prio_find_max(rlun);
390                 struct nvm_block *block = rblock->parent;
391
392                 if (!rblock->nr_invalid_pages)
393                         break;
394
395                 list_del_init(&rblock->prio);
396
397                 BUG_ON(!block_is_full(rrpc, rblock));
398
399                 pr_debug("rrpc: selected block '%lu' for GC\n", block->id);
400
401                 gcb = mempool_alloc(rrpc->gcb_pool, GFP_ATOMIC);
402                 if (!gcb)
403                         break;
404
405                 gcb->rrpc = rrpc;
406                 gcb->rblk = rblock;
407                 INIT_WORK(&gcb->ws_gc, rrpc_block_gc);
408
409                 queue_work(rrpc->kgc_wq, &gcb->ws_gc);
410
411                 nr_blocks_need--;
412         }
413         spin_unlock(&lun->lock);
414
415         /* TODO: Hint that request queue can be started again */
416 }
417
418 static void rrpc_gc_queue(struct work_struct *work)
419 {
420         struct rrpc_block_gc *gcb = container_of(work, struct rrpc_block_gc,
421                                                                         ws_gc);
422         struct rrpc *rrpc = gcb->rrpc;
423         struct rrpc_block *rblk = gcb->rblk;
424         struct nvm_lun *lun = rblk->parent->lun;
425         struct rrpc_lun *rlun = &rrpc->luns[lun->id - rrpc->lun_offset];
426
427         spin_lock(&rlun->lock);
428         list_add_tail(&rblk->prio, &rlun->prio_list);
429         spin_unlock(&rlun->lock);
430
431         mempool_free(gcb, rrpc->gcb_pool);
432         pr_debug("nvm: block '%lu' is full, allow GC (sched)\n",
433                                                         rblk->parent->id);
434 }
435
436 static const struct block_device_operations rrpc_fops = {
437         .owner          = THIS_MODULE,
438 };
439
440 static struct rrpc_lun *rrpc_get_lun_rr(struct rrpc *rrpc, int is_gc)
441 {
442         unsigned int i;
443         struct rrpc_lun *rlun, *max_free;
444
445         if (!is_gc)
446                 return get_next_lun(rrpc);
447
448         /* during GC, we don't care about RR, instead we want to make
449          * sure that we maintain evenness between the block luns.
450          */
451         max_free = &rrpc->luns[0];
452         /* prevent GC-ing lun from devouring pages of a lun with
453          * little free blocks. We don't take the lock as we only need an
454          * estimate.
455          */
456         rrpc_for_each_lun(rrpc, rlun, i) {
457                 if (rlun->parent->nr_free_blocks >
458                                         max_free->parent->nr_free_blocks)
459                         max_free = rlun;
460         }
461
462         return max_free;
463 }
464
465 static struct rrpc_addr *rrpc_update_map(struct rrpc *rrpc, sector_t laddr,
466                                         struct rrpc_block *rblk, u64 paddr)
467 {
468         struct rrpc_addr *gp;
469         struct rrpc_rev_addr *rev;
470
471         BUG_ON(laddr >= rrpc->nr_pages);
472
473         gp = &rrpc->trans_map[laddr];
474         spin_lock(&rrpc->rev_lock);
475         if (gp->rblk)
476                 rrpc_page_invalidate(rrpc, gp);
477
478         gp->addr = paddr;
479         gp->rblk = rblk;
480
481         rev = &rrpc->rev_trans_map[gp->addr - rrpc->poffset];
482         rev->addr = laddr;
483         spin_unlock(&rrpc->rev_lock);
484
485         return gp;
486 }
487
488 static u64 rrpc_alloc_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
489 {
490         u64 addr = ADDR_EMPTY;
491
492         spin_lock(&rblk->lock);
493         if (block_is_full(rrpc, rblk))
494                 goto out;
495
496         addr = block_to_addr(rrpc, rblk) + rblk->next_page;
497
498         rblk->next_page++;
499 out:
500         spin_unlock(&rblk->lock);
501         return addr;
502 }
503
504 /* Simple round-robin Logical to physical address translation.
505  *
506  * Retrieve the mapping using the active append point. Then update the ap for
507  * the next write to the disk.
508  *
509  * Returns rrpc_addr with the physical address and block. Remember to return to
510  * rrpc->addr_cache when request is finished.
511  */
512 static struct rrpc_addr *rrpc_map_page(struct rrpc *rrpc, sector_t laddr,
513                                                                 int is_gc)
514 {
515         struct rrpc_lun *rlun;
516         struct rrpc_block *rblk;
517         struct nvm_lun *lun;
518         u64 paddr;
519
520         rlun = rrpc_get_lun_rr(rrpc, is_gc);
521         lun = rlun->parent;
522
523         if (!is_gc && lun->nr_free_blocks < rrpc->nr_luns * 4)
524                 return NULL;
525
526         spin_lock(&rlun->lock);
527
528         rblk = rlun->cur;
529 retry:
530         paddr = rrpc_alloc_addr(rrpc, rblk);
531
532         if (paddr == ADDR_EMPTY) {
533                 rblk = rrpc_get_blk(rrpc, rlun, 0);
534                 if (rblk) {
535                         rrpc_set_lun_cur(rlun, rblk);
536                         goto retry;
537                 }
538
539                 if (is_gc) {
540                         /* retry from emergency gc block */
541                         paddr = rrpc_alloc_addr(rrpc, rlun->gc_cur);
542                         if (paddr == ADDR_EMPTY) {
543                                 rblk = rrpc_get_blk(rrpc, rlun, 1);
544                                 if (!rblk) {
545                                         pr_err("rrpc: no more blocks");
546                                         goto err;
547                                 }
548
549                                 rlun->gc_cur = rblk;
550                                 paddr = rrpc_alloc_addr(rrpc, rlun->gc_cur);
551                         }
552                         rblk = rlun->gc_cur;
553                 }
554         }
555
556         spin_unlock(&rlun->lock);
557         return rrpc_update_map(rrpc, laddr, rblk, paddr);
558 err:
559         spin_unlock(&rlun->lock);
560         return NULL;
561 }
562
563 static void rrpc_run_gc(struct rrpc *rrpc, struct rrpc_block *rblk)
564 {
565         struct rrpc_block_gc *gcb;
566
567         gcb = mempool_alloc(rrpc->gcb_pool, GFP_ATOMIC);
568         if (!gcb) {
569                 pr_err("rrpc: unable to queue block for gc.");
570                 return;
571         }
572
573         gcb->rrpc = rrpc;
574         gcb->rblk = rblk;
575
576         INIT_WORK(&gcb->ws_gc, rrpc_gc_queue);
577         queue_work(rrpc->kgc_wq, &gcb->ws_gc);
578 }
579
580 static void rrpc_end_io_write(struct rrpc *rrpc, struct rrpc_rq *rrqd,
581                                                 sector_t laddr, uint8_t npages)
582 {
583         struct rrpc_addr *p;
584         struct rrpc_block *rblk;
585         struct nvm_lun *lun;
586         int cmnt_size, i;
587
588         for (i = 0; i < npages; i++) {
589                 p = &rrpc->trans_map[laddr + i];
590                 rblk = p->rblk;
591                 lun = rblk->parent->lun;
592
593                 cmnt_size = atomic_inc_return(&rblk->data_cmnt_size);
594                 if (unlikely(cmnt_size == rrpc->dev->pgs_per_blk))
595                         rrpc_run_gc(rrpc, rblk);
596         }
597 }
598
599 static int rrpc_end_io(struct nvm_rq *rqd, int error)
600 {
601         struct rrpc *rrpc = container_of(rqd->ins, struct rrpc, instance);
602         struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
603         uint8_t npages = rqd->nr_pages;
604         sector_t laddr = rrpc_get_laddr(rqd->bio) - npages;
605
606         if (bio_data_dir(rqd->bio) == WRITE)
607                 rrpc_end_io_write(rrpc, rrqd, laddr, npages);
608
609         if (rrqd->flags & NVM_IOTYPE_GC)
610                 return 0;
611
612         rrpc_unlock_rq(rrpc, rqd);
613         bio_put(rqd->bio);
614
615         if (npages > 1)
616                 nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list);
617         if (rqd->metadata)
618                 nvm_dev_dma_free(rrpc->dev, rqd->metadata, rqd->dma_metadata);
619
620         mempool_free(rqd, rrpc->rq_pool);
621
622         return 0;
623 }
624
625 static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
626                         struct nvm_rq *rqd, unsigned long flags, int npages)
627 {
628         struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rqd);
629         struct rrpc_addr *gp;
630         sector_t laddr = rrpc_get_laddr(bio);
631         int is_gc = flags & NVM_IOTYPE_GC;
632         int i;
633
634         if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd)) {
635                 nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list);
636                 return NVM_IO_REQUEUE;
637         }
638
639         for (i = 0; i < npages; i++) {
640                 /* We assume that mapping occurs at 4KB granularity */
641                 BUG_ON(!(laddr + i >= 0 && laddr + i < rrpc->nr_pages));
642                 gp = &rrpc->trans_map[laddr + i];
643
644                 if (gp->rblk) {
645                         rqd->ppa_list[i] = rrpc_ppa_to_gaddr(rrpc->dev,
646                                                                 gp->addr);
647                 } else {
648                         BUG_ON(is_gc);
649                         rrpc_unlock_laddr(rrpc, r);
650                         nvm_dev_dma_free(rrpc->dev, rqd->ppa_list,
651                                                         rqd->dma_ppa_list);
652                         return NVM_IO_DONE;
653                 }
654         }
655
656         rqd->opcode = NVM_OP_HBREAD;
657
658         return NVM_IO_OK;
659 }
660
661 static int rrpc_read_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq *rqd,
662                                                         unsigned long flags)
663 {
664         struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
665         int is_gc = flags & NVM_IOTYPE_GC;
666         sector_t laddr = rrpc_get_laddr(bio);
667         struct rrpc_addr *gp;
668
669         if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd))
670                 return NVM_IO_REQUEUE;
671
672         BUG_ON(!(laddr >= 0 && laddr < rrpc->nr_pages));
673         gp = &rrpc->trans_map[laddr];
674
675         if (gp->rblk) {
676                 rqd->ppa_addr = rrpc_ppa_to_gaddr(rrpc->dev, gp->addr);
677         } else {
678                 BUG_ON(is_gc);
679                 rrpc_unlock_rq(rrpc, rqd);
680                 return NVM_IO_DONE;
681         }
682
683         rqd->opcode = NVM_OP_HBREAD;
684         rrqd->addr = gp;
685
686         return NVM_IO_OK;
687 }
688
689 static int rrpc_write_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
690                         struct nvm_rq *rqd, unsigned long flags, int npages)
691 {
692         struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rqd);
693         struct rrpc_addr *p;
694         sector_t laddr = rrpc_get_laddr(bio);
695         int is_gc = flags & NVM_IOTYPE_GC;
696         int i;
697
698         if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd)) {
699                 nvm_dev_dma_free(rrpc->dev, rqd->ppa_list, rqd->dma_ppa_list);
700                 return NVM_IO_REQUEUE;
701         }
702
703         for (i = 0; i < npages; i++) {
704                 /* We assume that mapping occurs at 4KB granularity */
705                 p = rrpc_map_page(rrpc, laddr + i, is_gc);
706                 if (!p) {
707                         BUG_ON(is_gc);
708                         rrpc_unlock_laddr(rrpc, r);
709                         nvm_dev_dma_free(rrpc->dev, rqd->ppa_list,
710                                                         rqd->dma_ppa_list);
711                         rrpc_gc_kick(rrpc);
712                         return NVM_IO_REQUEUE;
713                 }
714
715                 rqd->ppa_list[i] = rrpc_ppa_to_gaddr(rrpc->dev,
716                                                                 p->addr);
717         }
718
719         rqd->opcode = NVM_OP_HBWRITE;
720
721         return NVM_IO_OK;
722 }
723
724 static int rrpc_write_rq(struct rrpc *rrpc, struct bio *bio,
725                                 struct nvm_rq *rqd, unsigned long flags)
726 {
727         struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
728         struct rrpc_addr *p;
729         int is_gc = flags & NVM_IOTYPE_GC;
730         sector_t laddr = rrpc_get_laddr(bio);
731
732         if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd))
733                 return NVM_IO_REQUEUE;
734
735         p = rrpc_map_page(rrpc, laddr, is_gc);
736         if (!p) {
737                 BUG_ON(is_gc);
738                 rrpc_unlock_rq(rrpc, rqd);
739                 rrpc_gc_kick(rrpc);
740                 return NVM_IO_REQUEUE;
741         }
742
743         rqd->ppa_addr = rrpc_ppa_to_gaddr(rrpc->dev, p->addr);
744         rqd->opcode = NVM_OP_HBWRITE;
745         rrqd->addr = p;
746
747         return NVM_IO_OK;
748 }
749
750 static int rrpc_setup_rq(struct rrpc *rrpc, struct bio *bio,
751                         struct nvm_rq *rqd, unsigned long flags, uint8_t npages)
752 {
753         if (npages > 1) {
754                 rqd->ppa_list = nvm_dev_dma_alloc(rrpc->dev, GFP_KERNEL,
755                                                         &rqd->dma_ppa_list);
756                 if (!rqd->ppa_list) {
757                         pr_err("rrpc: not able to allocate ppa list\n");
758                         return NVM_IO_ERR;
759                 }
760
761                 if (bio_rw(bio) == WRITE)
762                         return rrpc_write_ppalist_rq(rrpc, bio, rqd, flags,
763                                                                         npages);
764
765                 return rrpc_read_ppalist_rq(rrpc, bio, rqd, flags, npages);
766         }
767
768         if (bio_rw(bio) == WRITE)
769                 return rrpc_write_rq(rrpc, bio, rqd, flags);
770
771         return rrpc_read_rq(rrpc, bio, rqd, flags);
772 }
773
774 static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
775                                 struct nvm_rq *rqd, unsigned long flags)
776 {
777         int err;
778         struct rrpc_rq *rrq = nvm_rq_to_pdu(rqd);
779         uint8_t nr_pages = rrpc_get_pages(bio);
780         int bio_size = bio_sectors(bio) << 9;
781
782         if (bio_size < rrpc->dev->sec_size)
783                 return NVM_IO_ERR;
784         else if (bio_size > rrpc->dev->max_rq_size)
785                 return NVM_IO_ERR;
786
787         err = rrpc_setup_rq(rrpc, bio, rqd, flags, nr_pages);
788         if (err)
789                 return err;
790
791         bio_get(bio);
792         rqd->bio = bio;
793         rqd->ins = &rrpc->instance;
794         rqd->nr_pages = nr_pages;
795         rrq->flags = flags;
796
797         err = nvm_submit_io(rrpc->dev, rqd);
798         if (err) {
799                 pr_err("rrpc: I/O submission failed: %d\n", err);
800                 return NVM_IO_ERR;
801         }
802
803         return NVM_IO_OK;
804 }
805
806 static void rrpc_make_rq(struct request_queue *q, struct bio *bio)
807 {
808         struct rrpc *rrpc = q->queuedata;
809         struct nvm_rq *rqd;
810         int err;
811
812         if (bio->bi_rw & REQ_DISCARD) {
813                 rrpc_discard(rrpc, bio);
814                 return;
815         }
816
817         rqd = mempool_alloc(rrpc->rq_pool, GFP_KERNEL);
818         if (!rqd) {
819                 pr_err_ratelimited("rrpc: not able to queue bio.");
820                 bio_io_error(bio);
821                 return;
822         }
823         memset(rqd, 0, sizeof(struct nvm_rq));
824
825         err = rrpc_submit_io(rrpc, bio, rqd, NVM_IOTYPE_NONE);
826         switch (err) {
827         case NVM_IO_OK:
828                 return;
829         case NVM_IO_ERR:
830                 bio_io_error(bio);
831                 break;
832         case NVM_IO_DONE:
833                 bio_endio(bio);
834                 break;
835         case NVM_IO_REQUEUE:
836                 spin_lock(&rrpc->bio_lock);
837                 bio_list_add(&rrpc->requeue_bios, bio);
838                 spin_unlock(&rrpc->bio_lock);
839                 queue_work(rrpc->kgc_wq, &rrpc->ws_requeue);
840                 break;
841         }
842
843         mempool_free(rqd, rrpc->rq_pool);
844 }
845
846 static void rrpc_requeue(struct work_struct *work)
847 {
848         struct rrpc *rrpc = container_of(work, struct rrpc, ws_requeue);
849         struct bio_list bios;
850         struct bio *bio;
851
852         bio_list_init(&bios);
853
854         spin_lock(&rrpc->bio_lock);
855         bio_list_merge(&bios, &rrpc->requeue_bios);
856         bio_list_init(&rrpc->requeue_bios);
857         spin_unlock(&rrpc->bio_lock);
858
859         while ((bio = bio_list_pop(&bios)))
860                 rrpc_make_rq(rrpc->disk->queue, bio);
861 }
862
863 static void rrpc_gc_free(struct rrpc *rrpc)
864 {
865         struct rrpc_lun *rlun;
866         int i;
867
868         if (rrpc->krqd_wq)
869                 destroy_workqueue(rrpc->krqd_wq);
870
871         if (rrpc->kgc_wq)
872                 destroy_workqueue(rrpc->kgc_wq);
873
874         if (!rrpc->luns)
875                 return;
876
877         for (i = 0; i < rrpc->nr_luns; i++) {
878                 rlun = &rrpc->luns[i];
879
880                 if (!rlun->blocks)
881                         break;
882                 vfree(rlun->blocks);
883         }
884 }
885
886 static int rrpc_gc_init(struct rrpc *rrpc)
887 {
888         rrpc->krqd_wq = alloc_workqueue("rrpc-lun", WQ_MEM_RECLAIM|WQ_UNBOUND,
889                                                                 rrpc->nr_luns);
890         if (!rrpc->krqd_wq)
891                 return -ENOMEM;
892
893         rrpc->kgc_wq = alloc_workqueue("rrpc-bg", WQ_MEM_RECLAIM, 1);
894         if (!rrpc->kgc_wq)
895                 return -ENOMEM;
896
897         setup_timer(&rrpc->gc_timer, rrpc_gc_timer, (unsigned long)rrpc);
898
899         return 0;
900 }
901
902 static void rrpc_map_free(struct rrpc *rrpc)
903 {
904         vfree(rrpc->rev_trans_map);
905         vfree(rrpc->trans_map);
906 }
907
908 static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
909 {
910         struct rrpc *rrpc = (struct rrpc *)private;
911         struct nvm_dev *dev = rrpc->dev;
912         struct rrpc_addr *addr = rrpc->trans_map + slba;
913         struct rrpc_rev_addr *raddr = rrpc->rev_trans_map;
914         sector_t max_pages = dev->total_pages * (dev->sec_size >> 9);
915         u64 elba = slba + nlb;
916         u64 i;
917
918         if (unlikely(elba > dev->total_pages)) {
919                 pr_err("nvm: L2P data from device is out of bounds!\n");
920                 return -EINVAL;
921         }
922
923         for (i = 0; i < nlb; i++) {
924                 u64 pba = le64_to_cpu(entries[i]);
925                 /* LNVM treats address-spaces as silos, LBA and PBA are
926                  * equally large and zero-indexed.
927                  */
928                 if (unlikely(pba >= max_pages && pba != U64_MAX)) {
929                         pr_err("nvm: L2P data entry is out of bounds!\n");
930                         return -EINVAL;
931                 }
932
933                 /* Address zero is a special one. The first page on a disk is
934                  * protected. As it often holds internal device boot
935                  * information.
936                  */
937                 if (!pba)
938                         continue;
939
940                 addr[i].addr = pba;
941                 raddr[pba].addr = slba + i;
942         }
943
944         return 0;
945 }
946
947 static int rrpc_map_init(struct rrpc *rrpc)
948 {
949         struct nvm_dev *dev = rrpc->dev;
950         sector_t i;
951         int ret;
952
953         rrpc->trans_map = vzalloc(sizeof(struct rrpc_addr) * rrpc->nr_pages);
954         if (!rrpc->trans_map)
955                 return -ENOMEM;
956
957         rrpc->rev_trans_map = vmalloc(sizeof(struct rrpc_rev_addr)
958                                                         * rrpc->nr_pages);
959         if (!rrpc->rev_trans_map)
960                 return -ENOMEM;
961
962         for (i = 0; i < rrpc->nr_pages; i++) {
963                 struct rrpc_addr *p = &rrpc->trans_map[i];
964                 struct rrpc_rev_addr *r = &rrpc->rev_trans_map[i];
965
966                 p->addr = ADDR_EMPTY;
967                 r->addr = ADDR_EMPTY;
968         }
969
970         if (!dev->ops->get_l2p_tbl)
971                 return 0;
972
973         /* Bring up the mapping table from device */
974         ret = dev->ops->get_l2p_tbl(dev->q, 0, dev->total_pages,
975                                                         rrpc_l2p_update, rrpc);
976         if (ret) {
977                 pr_err("nvm: rrpc: could not read L2P table.\n");
978                 return -EINVAL;
979         }
980
981         return 0;
982 }
983
984
985 /* Minimum pages needed within a lun */
986 #define PAGE_POOL_SIZE 16
987 #define ADDR_POOL_SIZE 64
988
989 static int rrpc_core_init(struct rrpc *rrpc)
990 {
991         down_write(&rrpc_lock);
992         if (!rrpc_gcb_cache) {
993                 rrpc_gcb_cache = kmem_cache_create("rrpc_gcb",
994                                 sizeof(struct rrpc_block_gc), 0, 0, NULL);
995                 if (!rrpc_gcb_cache) {
996                         up_write(&rrpc_lock);
997                         return -ENOMEM;
998                 }
999
1000                 rrpc_rq_cache = kmem_cache_create("rrpc_rq",
1001                                 sizeof(struct nvm_rq) + sizeof(struct rrpc_rq),
1002                                 0, 0, NULL);
1003                 if (!rrpc_rq_cache) {
1004                         kmem_cache_destroy(rrpc_gcb_cache);
1005                         up_write(&rrpc_lock);
1006                         return -ENOMEM;
1007                 }
1008         }
1009         up_write(&rrpc_lock);
1010
1011         rrpc->page_pool = mempool_create_page_pool(PAGE_POOL_SIZE, 0);
1012         if (!rrpc->page_pool)
1013                 return -ENOMEM;
1014
1015         rrpc->gcb_pool = mempool_create_slab_pool(rrpc->dev->nr_luns,
1016                                                                 rrpc_gcb_cache);
1017         if (!rrpc->gcb_pool)
1018                 return -ENOMEM;
1019
1020         rrpc->rq_pool = mempool_create_slab_pool(64, rrpc_rq_cache);
1021         if (!rrpc->rq_pool)
1022                 return -ENOMEM;
1023
1024         spin_lock_init(&rrpc->inflights.lock);
1025         INIT_LIST_HEAD(&rrpc->inflights.reqs);
1026
1027         return 0;
1028 }
1029
1030 static void rrpc_core_free(struct rrpc *rrpc)
1031 {
1032         mempool_destroy(rrpc->page_pool);
1033         mempool_destroy(rrpc->gcb_pool);
1034         mempool_destroy(rrpc->rq_pool);
1035 }
1036
1037 static void rrpc_luns_free(struct rrpc *rrpc)
1038 {
1039         kfree(rrpc->luns);
1040 }
1041
1042 static int rrpc_luns_init(struct rrpc *rrpc, int lun_begin, int lun_end)
1043 {
1044         struct nvm_dev *dev = rrpc->dev;
1045         struct rrpc_lun *rlun;
1046         int i, j;
1047
1048         spin_lock_init(&rrpc->rev_lock);
1049
1050         rrpc->luns = kcalloc(rrpc->nr_luns, sizeof(struct rrpc_lun),
1051                                                                 GFP_KERNEL);
1052         if (!rrpc->luns)
1053                 return -ENOMEM;
1054
1055         /* 1:1 mapping */
1056         for (i = 0; i < rrpc->nr_luns; i++) {
1057                 struct nvm_lun *lun = dev->mt->get_lun(dev, lun_begin + i);
1058
1059                 if (dev->pgs_per_blk >
1060                                 MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) {
1061                         pr_err("rrpc: number of pages per block too high.");
1062                         goto err;
1063                 }
1064
1065                 rlun = &rrpc->luns[i];
1066                 rlun->rrpc = rrpc;
1067                 rlun->parent = lun;
1068                 INIT_LIST_HEAD(&rlun->prio_list);
1069                 INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
1070                 spin_lock_init(&rlun->lock);
1071
1072                 rrpc->total_blocks += dev->blks_per_lun;
1073                 rrpc->nr_pages += dev->sec_per_lun;
1074
1075                 rlun->blocks = vzalloc(sizeof(struct rrpc_block) *
1076                                                 rrpc->dev->blks_per_lun);
1077                 if (!rlun->blocks)
1078                         goto err;
1079
1080                 for (j = 0; j < rrpc->dev->blks_per_lun; j++) {
1081                         struct rrpc_block *rblk = &rlun->blocks[j];
1082                         struct nvm_block *blk = &lun->blocks[j];
1083
1084                         rblk->parent = blk;
1085                         INIT_LIST_HEAD(&rblk->prio);
1086                         spin_lock_init(&rblk->lock);
1087                 }
1088         }
1089
1090         return 0;
1091 err:
1092         return -ENOMEM;
1093 }
1094
1095 static void rrpc_free(struct rrpc *rrpc)
1096 {
1097         rrpc_gc_free(rrpc);
1098         rrpc_map_free(rrpc);
1099         rrpc_core_free(rrpc);
1100         rrpc_luns_free(rrpc);
1101
1102         kfree(rrpc);
1103 }
1104
1105 static void rrpc_exit(void *private)
1106 {
1107         struct rrpc *rrpc = private;
1108
1109         del_timer(&rrpc->gc_timer);
1110
1111         flush_workqueue(rrpc->krqd_wq);
1112         flush_workqueue(rrpc->kgc_wq);
1113
1114         rrpc_free(rrpc);
1115 }
1116
1117 static sector_t rrpc_capacity(void *private)
1118 {
1119         struct rrpc *rrpc = private;
1120         struct nvm_dev *dev = rrpc->dev;
1121         sector_t reserved, provisioned;
1122
1123         /* cur, gc, and two emergency blocks for each lun */
1124         reserved = rrpc->nr_luns * dev->max_pages_per_blk * 4;
1125         provisioned = rrpc->nr_pages - reserved;
1126
1127         if (reserved > rrpc->nr_pages) {
1128                 pr_err("rrpc: not enough space available to expose storage.\n");
1129                 return 0;
1130         }
1131
1132         sector_div(provisioned, 10);
1133         return provisioned * 9 * NR_PHY_IN_LOG;
1134 }
1135
1136 /*
1137  * Looks up the logical address from reverse trans map and check if its valid by
1138  * comparing the logical to physical address with the physical address.
1139  * Returns 0 on free, otherwise 1 if in use
1140  */
1141 static void rrpc_block_map_update(struct rrpc *rrpc, struct rrpc_block *rblk)
1142 {
1143         struct nvm_dev *dev = rrpc->dev;
1144         int offset;
1145         struct rrpc_addr *laddr;
1146         u64 paddr, pladdr;
1147
1148         for (offset = 0; offset < dev->pgs_per_blk; offset++) {
1149                 paddr = block_to_addr(rrpc, rblk) + offset;
1150
1151                 pladdr = rrpc->rev_trans_map[paddr].addr;
1152                 if (pladdr == ADDR_EMPTY)
1153                         continue;
1154
1155                 laddr = &rrpc->trans_map[pladdr];
1156
1157                 if (paddr == laddr->addr) {
1158                         laddr->rblk = rblk;
1159                 } else {
1160                         set_bit(offset, rblk->invalid_pages);
1161                         rblk->nr_invalid_pages++;
1162                 }
1163         }
1164 }
1165
1166 static int rrpc_blocks_init(struct rrpc *rrpc)
1167 {
1168         struct rrpc_lun *rlun;
1169         struct rrpc_block *rblk;
1170         int lun_iter, blk_iter;
1171
1172         for (lun_iter = 0; lun_iter < rrpc->nr_luns; lun_iter++) {
1173                 rlun = &rrpc->luns[lun_iter];
1174
1175                 for (blk_iter = 0; blk_iter < rrpc->dev->blks_per_lun;
1176                                                                 blk_iter++) {
1177                         rblk = &rlun->blocks[blk_iter];
1178                         rrpc_block_map_update(rrpc, rblk);
1179                 }
1180         }
1181
1182         return 0;
1183 }
1184
1185 static int rrpc_luns_configure(struct rrpc *rrpc)
1186 {
1187         struct rrpc_lun *rlun;
1188         struct rrpc_block *rblk;
1189         int i;
1190
1191         for (i = 0; i < rrpc->nr_luns; i++) {
1192                 rlun = &rrpc->luns[i];
1193
1194                 rblk = rrpc_get_blk(rrpc, rlun, 0);
1195                 if (!rblk)
1196                         return -EINVAL;
1197
1198                 rrpc_set_lun_cur(rlun, rblk);
1199
1200                 /* Emergency gc block */
1201                 rblk = rrpc_get_blk(rrpc, rlun, 1);
1202                 if (!rblk)
1203                         return -EINVAL;
1204                 rlun->gc_cur = rblk;
1205         }
1206
1207         return 0;
1208 }
1209
1210 static struct nvm_tgt_type tt_rrpc;
1211
1212 static void *rrpc_init(struct nvm_dev *dev, struct gendisk *tdisk,
1213                                                 int lun_begin, int lun_end)
1214 {
1215         struct request_queue *bqueue = dev->q;
1216         struct request_queue *tqueue = tdisk->queue;
1217         struct rrpc *rrpc;
1218         int ret;
1219
1220         if (!(dev->identity.dom & NVM_RSP_L2P)) {
1221                 pr_err("nvm: rrpc: device does not support l2p (%x)\n",
1222                                                         dev->identity.dom);
1223                 return ERR_PTR(-EINVAL);
1224         }
1225
1226         rrpc = kzalloc(sizeof(struct rrpc), GFP_KERNEL);
1227         if (!rrpc)
1228                 return ERR_PTR(-ENOMEM);
1229
1230         rrpc->instance.tt = &tt_rrpc;
1231         rrpc->dev = dev;
1232         rrpc->disk = tdisk;
1233
1234         bio_list_init(&rrpc->requeue_bios);
1235         spin_lock_init(&rrpc->bio_lock);
1236         INIT_WORK(&rrpc->ws_requeue, rrpc_requeue);
1237
1238         rrpc->nr_luns = lun_end - lun_begin + 1;
1239
1240         /* simple round-robin strategy */
1241         atomic_set(&rrpc->next_lun, -1);
1242
1243         ret = rrpc_luns_init(rrpc, lun_begin, lun_end);
1244         if (ret) {
1245                 pr_err("nvm: rrpc: could not initialize luns\n");
1246                 goto err;
1247         }
1248
1249         rrpc->poffset = dev->sec_per_lun * lun_begin;
1250         rrpc->lun_offset = lun_begin;
1251
1252         ret = rrpc_core_init(rrpc);
1253         if (ret) {
1254                 pr_err("nvm: rrpc: could not initialize core\n");
1255                 goto err;
1256         }
1257
1258         ret = rrpc_map_init(rrpc);
1259         if (ret) {
1260                 pr_err("nvm: rrpc: could not initialize maps\n");
1261                 goto err;
1262         }
1263
1264         ret = rrpc_blocks_init(rrpc);
1265         if (ret) {
1266                 pr_err("nvm: rrpc: could not initialize state for blocks\n");
1267                 goto err;
1268         }
1269
1270         ret = rrpc_luns_configure(rrpc);
1271         if (ret) {
1272                 pr_err("nvm: rrpc: not enough blocks available in LUNs.\n");
1273                 goto err;
1274         }
1275
1276         ret = rrpc_gc_init(rrpc);
1277         if (ret) {
1278                 pr_err("nvm: rrpc: could not initialize gc\n");
1279                 goto err;
1280         }
1281
1282         /* inherit the size from the underlying device */
1283         blk_queue_logical_block_size(tqueue, queue_physical_block_size(bqueue));
1284         blk_queue_max_hw_sectors(tqueue, queue_max_hw_sectors(bqueue));
1285
1286         pr_info("nvm: rrpc initialized with %u luns and %llu pages.\n",
1287                         rrpc->nr_luns, (unsigned long long)rrpc->nr_pages);
1288
1289         mod_timer(&rrpc->gc_timer, jiffies + msecs_to_jiffies(10));
1290
1291         return rrpc;
1292 err:
1293         rrpc_free(rrpc);
1294         return ERR_PTR(ret);
1295 }
1296
1297 /* round robin, page-based FTL, and cost-based GC */
1298 static struct nvm_tgt_type tt_rrpc = {
1299         .name           = "rrpc",
1300         .version        = {1, 0, 0},
1301
1302         .make_rq        = rrpc_make_rq,
1303         .capacity       = rrpc_capacity,
1304         .end_io         = rrpc_end_io,
1305
1306         .init           = rrpc_init,
1307         .exit           = rrpc_exit,
1308 };
1309
1310 static int __init rrpc_module_init(void)
1311 {
1312         return nvm_register_target(&tt_rrpc);
1313 }
1314
1315 static void rrpc_module_exit(void)
1316 {
1317         nvm_unregister_target(&tt_rrpc);
1318 }
1319
1320 module_init(rrpc_module_init);
1321 module_exit(rrpc_module_exit);
1322 MODULE_LICENSE("GPL v2");
1323 MODULE_DESCRIPTION("Block-Device Target for Open-Channel SSDs");