[S390] qdio: 2nd stage retry on SIGA-W busy conditions
authorJan Glauber <jang@linux.vnet.ibm.com>
Wed, 3 Aug 2011 14:44:17 +0000 (16:44 +0200)
committerHeiko Carstens <heiko.carstens@de.ibm.com>
Wed, 3 Aug 2011 14:44:19 +0000 (16:44 +0200)
The SIGA-W may return with the busy bit set which means the device was
blocked. The busy loop which retries the SIGA-W for 100us may not be
long enough when running under a heavily loaded hypervisor.

Extend the retry mechanism by adding a longer second stage which retries
the SIGA-W for up to 10s. In difference to the first retry loop the second
stage is using mdelay to stop the cpu between the retries and thereby
avoid additional preassure in on the hypervisor.
If the second stage retry is successfull a device reset is avoided.

Signed-off-by: Jan Glauber <jang@linux.vnet.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
drivers/s390/cio/qdio.h
drivers/s390/cio/qdio_main.c

index 7bc643f..e5c9664 100644 (file)
@@ -14,6 +14,8 @@
 #include "chsc.h"
 
 #define QDIO_BUSY_BIT_PATIENCE         (100 << 12)     /* 100 microseconds */
+#define QDIO_BUSY_BIT_RETRY_DELAY      10              /* 10 milliseconds */
+#define QDIO_BUSY_BIT_RETRIES          1000            /* = 10s retry time */
 #define QDIO_INPUT_THRESHOLD           (500 << 12)     /* 500 microseconds */
 
 /*
index e58169c..288c914 100644 (file)
@@ -313,7 +313,7 @@ static int qdio_siga_output(struct qdio_q *q, unsigned int *busy_bit)
        unsigned long schid = *((u32 *) &q->irq_ptr->schid);
        unsigned int fc = QDIO_SIGA_WRITE;
        u64 start_time = 0;
-       int cc;
+       int retries = 0, cc;
 
        if (is_qebsm(q)) {
                schid = q->irq_ptr->sch_token;
@@ -325,6 +325,7 @@ again:
        /* hipersocket busy condition */
        if (unlikely(*busy_bit)) {
                WARN_ON(queue_type(q) != QDIO_IQDIO_QFMT || cc != 2);
+               retries++;
 
                if (!start_time) {
                        start_time = get_clock();
@@ -333,6 +334,11 @@ again:
                if ((get_clock() - start_time) < QDIO_BUSY_BIT_PATIENCE)
                        goto again;
        }
+       if (retries) {
+               DBF_DEV_EVENT(DBF_WARN, q->irq_ptr,
+                             "%4x cc2 BB1:%1d", SCH_NO(q), q->nr);
+               DBF_DEV_EVENT(DBF_WARN, q->irq_ptr, "count:%u", retries);
+       }
        return cc;
 }
 
@@ -728,13 +734,14 @@ static inline int qdio_outbound_q_moved(struct qdio_q *q)
 
 static int qdio_kick_outbound_q(struct qdio_q *q)
 {
+       int retries = 0, cc;
        unsigned int busy_bit;
-       int cc;
 
        if (!need_siga_out(q))
                return 0;
 
        DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "siga-w:%1d", q->nr);
+retry:
        qperf_inc(q, siga_write);
 
        cc = qdio_siga_output(q, &busy_bit);
@@ -743,7 +750,11 @@ static int qdio_kick_outbound_q(struct qdio_q *q)
                break;
        case 2:
                if (busy_bit) {
-                       DBF_ERROR("%4x cc2 REP:%1d", SCH_NO(q), q->nr);
+                       while (++retries < QDIO_BUSY_BIT_RETRIES) {
+                               mdelay(QDIO_BUSY_BIT_RETRY_DELAY);
+                               goto retry;
+                       }
+                       DBF_ERROR("%4x cc2 BBC:%1d", SCH_NO(q), q->nr);
                        cc |= QDIO_ERROR_SIGA_BUSY;
                } else
                        DBF_DEV_EVENT(DBF_INFO, q->irq_ptr, "siga-w cc2:%1d", q->nr);
@@ -753,6 +764,10 @@ static int qdio_kick_outbound_q(struct qdio_q *q)
                DBF_ERROR("%4x SIGA-W:%1d", SCH_NO(q), cc);
                break;
        }
+       if (retries) {
+               DBF_ERROR("%4x cc2 BB2:%1d", SCH_NO(q), q->nr);
+               DBF_ERROR("count:%u", retries);
+       }
        return cc;
 }