ntb_perf: Improve thread handling to increase robustness

[cascardo/linux.git] / drivers / ntb / test / ntb_perf.c
diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c

index 8dfce9c..db4dc61 100644 (file)
--- a/drivers/ntb/test/ntb_perf.c
+++ b/drivers/ntb/test/ntb_perf.c
@@ -58,6 +58,7 @@
  #include <linux/delay.h>
  #include <linux/sizes.h>
  #include <linux/ntb.h>
+#include <linux/mutex.h>
  
  #define DRIVER_NAME            "ntb_perf"
  #define DRIVER_DESCRIPTION     "PCIe NTB Performance Measurement Tool"
@@ -83,6 +84,10 @@ MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
  
  static struct dentry *perf_debugfs_dir;
  
+static unsigned long max_mw_size;
+module_param(max_mw_size, ulong, 0644);
+MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows");
+
  static unsigned int seg_order = 19; /* 512K */
  module_param(seg_order, uint, 0644);
  MODULE_PARM_DESC(seg_order, "size order [n^2] of buffer segment for testing");
@@ -117,6 +122,7 @@ struct pthr_ctx {
         int                     dma_prep_err;
         int                     src_idx;
         void                    *srcs[MAX_SRCS];
+       wait_queue_head_t       *wq;
  };
  
  struct perf_ctx {
@@ -130,17 +136,17 @@ struct perf_ctx {
         struct dentry           *debugfs_run;
         struct dentry           *debugfs_threads;
         u8                      perf_threads;
-       bool                    run;
+       /* mutex ensures only one set of threads run at once */
+       struct mutex            run_mutex;
         struct pthr_ctx         pthr_ctx[MAX_THREADS];
         atomic_t                tsync;
+       atomic_t                tdone;
  };
  
  enum {
         VERSION = 0,
         MW_SZ_HIGH,
         MW_SZ_LOW,
-       SPAD_MSG,
-       SPAD_ACK,
         MAX_SPAD
  };
  
@@ -271,6 +277,7 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src,
         char __iomem *tmp = dst;
         u64 perf, diff_us;
         ktime_t kstart, kstop, kdiff;
+       unsigned long last_sleep = jiffies;
  
         chunks = div64_u64(win_size, buf_size);
         total_chunks = div64_u64(total, buf_size);
@@ -286,17 +293,24 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src,
                 } else
                         tmp += buf_size;
  
-               /* Probably should schedule every 4GB to prevent soft hang. */
-               if (((copied % SZ_4G) == 0) && !use_dma) {
+               /* Probably should schedule every 5s to prevent soft hang. */
+               if (unlikely((jiffies - last_sleep) > 5 * HZ)) {
+                       last_sleep = jiffies;
                         set_current_state(TASK_INTERRUPTIBLE);
                         schedule_timeout(1);
                 }
+
+               if (unlikely(kthread_should_stop()))
+                       break;
         }
  
         if (use_dma) {
                 pr_info("%s: All DMA descriptors submitted\n", current->comm);
-               while (atomic_read(&pctx->dma_sync) != 0)
+               while (atomic_read(&pctx->dma_sync) != 0) {
+                       if (kthread_should_stop())
+                               break;
                         msleep(20);
+               }
         }
  
         kstop = ktime_get();
@@ -389,7 +403,10 @@ static int ntb_perf_thread(void *data)
                 pctx->srcs[i] = NULL;
         }
  
-       return 0;
+       atomic_inc(&perf->tdone);
+       wake_up(pctx->wq);
+       rc = 0;
+       goto done;
  
  err:
         for (i = 0; i < MAX_SRCS; i++) {
@@ -402,6 +419,16 @@ err:
                 pctx->dma_chan = NULL;
         }
  
+done:
+       /* Wait until we are told to stop */
+       for (;;) {
+               set_current_state(TASK_INTERRUPTIBLE);
+               if (kthread_should_stop())
+                       break;
+               schedule();
+       }
+       __set_current_state(TASK_RUNNING);
+
         return rc;
  }
  
@@ -472,6 +499,10 @@ static void perf_link_work(struct work_struct *work)
         dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__);
  
         size = perf->mw.phys_size;
+
+       if (max_mw_size && size > max_mw_size)
+               size = max_mw_size;
+
         ntb_peer_spad_write(ndev, MW_SZ_HIGH, upper_32_bits(size));
         ntb_peer_spad_write(ndev, MW_SZ_LOW, lower_32_bits(size));
         ntb_peer_spad_write(ndev, VERSION, PERF_VERSION);
@@ -545,6 +576,7 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf,
         struct perf_ctx *perf = filp->private_data;
         char *buf;
         ssize_t ret, out_offset;
+       int running;
  
         if (!perf)
                 return 0;
@@ -552,7 +584,9 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf,
         buf = kmalloc(64, GFP_KERNEL);
         if (!buf)
                 return -ENOMEM;
-       out_offset = snprintf(buf, 64, "%d\n", perf->run);
+
+       running = mutex_is_locked(&perf->run_mutex);
+       out_offset = snprintf(buf, 64, "%d\n", running);
         ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset);
         kfree(buf);
  
@@ -564,7 +598,6 @@ static void threads_cleanup(struct perf_ctx *perf)
         struct pthr_ctx *pctx;
         int i;
  
-       perf->run = false;
         for (i = 0; i < MAX_THREADS; i++) {
                 pctx = &perf->pthr_ctx[i];
                 if (pctx->thread) {
@@ -579,65 +612,66 @@ static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf,
  {
         struct perf_ctx *perf = filp->private_data;
         int node, i;
+       DECLARE_WAIT_QUEUE_HEAD(wq);
  
         if (!perf->link_is_up)
-               return 0;
+               return -ENOLINK;
  
         if (perf->perf_threads == 0)
-               return 0;
+               return -EINVAL;
  
-       if (atomic_read(&perf->tsync) == 0)
-               perf->run = false;
+       if (!mutex_trylock(&perf->run_mutex))
+               return -EBUSY;
  
-       if (perf->run)
-               threads_cleanup(perf);
-       else {
-               perf->run = true;
+       if (perf->perf_threads > MAX_THREADS) {
+               perf->perf_threads = MAX_THREADS;
+               pr_info("Reset total threads to: %u\n", MAX_THREADS);
+       }
  
-               if (perf->perf_threads > MAX_THREADS) {
-                       perf->perf_threads = MAX_THREADS;
-                       pr_info("Reset total threads to: %u\n", MAX_THREADS);
-               }
+       /* no greater than 1M */
+       if (seg_order > MAX_SEG_ORDER) {
+               seg_order = MAX_SEG_ORDER;
+               pr_info("Fix seg_order to %u\n", seg_order);
+       }
  
-               /* no greater than 1M */
-               if (seg_order > MAX_SEG_ORDER) {
-                       seg_order = MAX_SEG_ORDER;
-                       pr_info("Fix seg_order to %u\n", seg_order);
-               }
+       if (run_order < seg_order) {
+               run_order = seg_order;
+               pr_info("Fix run_order to %u\n", run_order);
+       }
  
-               if (run_order < seg_order) {
-                       run_order = seg_order;
-                       pr_info("Fix run_order to %u\n", run_order);
-               }
+       node = dev_to_node(&perf->ntb->pdev->dev);
+       atomic_set(&perf->tdone, 0);
  
-               node = dev_to_node(&perf->ntb->pdev->dev);
-               /* launch kernel thread */
-               for (i = 0; i < perf->perf_threads; i++) {
-                       struct pthr_ctx *pctx;
-
-                       pctx = &perf->pthr_ctx[i];
-                       atomic_set(&pctx->dma_sync, 0);
-                       pctx->perf = perf;
-                       pctx->thread =
-                               kthread_create_on_node(ntb_perf_thread,
-                                                      (void *)pctx,
-                                                      node, "ntb_perf %d", i);
-                       if (IS_ERR(pctx->thread)) {
-                               pctx->thread = NULL;
-                               goto err;
-                       } else
-                               wake_up_process(pctx->thread);
-
-                       if (perf->run == false)
-                               return -ENXIO;
-               }
+       /* launch kernel thread */
+       for (i = 0; i < perf->perf_threads; i++) {
+               struct pthr_ctx *pctx;
  
+               pctx = &perf->pthr_ctx[i];
+               atomic_set(&pctx->dma_sync, 0);
+               pctx->perf = perf;
+               pctx->wq = &wq;
+               pctx->thread =
+                       kthread_create_on_node(ntb_perf_thread,
+                                              (void *)pctx,
+                                              node, "ntb_perf %d", i);
+               if (IS_ERR(pctx->thread)) {
+                       pctx->thread = NULL;
+                       goto err;
+               } else {
+                       wake_up_process(pctx->thread);
+               }
         }
  
+       wait_event_interruptible(wq,
+               atomic_read(&perf->tdone) == perf->perf_threads);
+
+       threads_cleanup(perf);
+       mutex_unlock(&perf->run_mutex);
         return count;
  
  err:
         threads_cleanup(perf);
+       mutex_unlock(&perf->run_mutex);
         return -ENXIO;
  }
  
@@ -688,6 +722,12 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
         int node;
         int rc = 0;
  
+       if (ntb_spad_count(ntb) < MAX_SPAD) {
+               dev_err(&ntb->dev, "Not enough scratch pad registers for %s",
+                       DRIVER_NAME);
+               return -EIO;
+       }
+
         node = dev_to_node(&pdev->dev);
  
         perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node);
@@ -699,7 +739,7 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
         perf->ntb = ntb;
         perf->perf_threads = 1;
         atomic_set(&perf->tsync, 0);
-       perf->run = false;
+       mutex_init(&perf->run_mutex);
         spin_lock_init(&perf->db_lock);
         perf_setup_mw(ntb, perf);
         INIT_DELAYED_WORK(&perf->link_work, perf_link_work);
@@ -734,6 +774,8 @@ static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb)
  
         dev_dbg(&perf->ntb->dev, "%s called\n", __func__);
  
+       mutex_lock(&perf->run_mutex);
+
         cancel_delayed_work_sync(&perf->link_work);
         cancel_work_sync(&perf->link_cleanup);