hpsa: clean up aborts
[cascardo/linux.git] / drivers / scsi / hpsa.c
index 82390ad..ae9d9e1 100644 (file)
@@ -253,6 +253,8 @@ static int hpsa_scsi_ioaccel_queue_command(struct ctlr_info *h,
        struct CommandList *c, u32 ioaccel_handle, u8 *cdb, int cdb_len,
        u8 *scsi3addr, struct hpsa_scsi_dev_t *phys_disk);
 static void hpsa_command_resubmit_worker(struct work_struct *work);
+static u32 lockup_detected(struct ctlr_info *h);
+static int detect_controller_lockup(struct ctlr_info *h);
 
 static inline struct ctlr_info *sdev_to_hba(struct scsi_device *sdev)
 {
@@ -426,7 +428,7 @@ static ssize_t host_show_hp_ssd_smart_path_status(struct device *dev,
 /* List of controllers which cannot be hard reset on kexec with reset_devices */
 static u32 unresettable_controller[] = {
        0x324a103C, /* Smart Array P712m */
-       0x324b103C, /* SmartArray P711m */
+       0x324b103C, /* Smart Array P711m */
        0x3223103C, /* Smart Array P800 */
        0x3234103C, /* Smart Array P400 */
        0x3235103C, /* Smart Array P400i */
@@ -468,24 +470,32 @@ static u32 soft_unresettable_controller[] = {
        0x409D0E11, /* Smart Array 6400 EM */
 };
 
-static int ctlr_is_hard_resettable(u32 board_id)
+static u32 needs_abort_tags_swizzled[] = {
+       0x323D103C, /* Smart Array P700m */
+       0x324a103C, /* Smart Array P712m */
+       0x324b103C, /* SmartArray P711m */
+};
+
+static int board_id_in_array(u32 a[], int nelems, u32 board_id)
 {
        int i;
 
-       for (i = 0; i < ARRAY_SIZE(unresettable_controller); i++)
-               if (unresettable_controller[i] == board_id)
-                       return 0;
-       return 1;
+       for (i = 0; i < nelems; i++)
+               if (a[i] == board_id)
+                       return 1;
+       return 0;
 }
 
-static int ctlr_is_soft_resettable(u32 board_id)
+static int ctlr_is_hard_resettable(u32 board_id)
 {
-       int i;
+       return !board_id_in_array(unresettable_controller,
+                       ARRAY_SIZE(unresettable_controller), board_id);
+}
 
-       for (i = 0; i < ARRAY_SIZE(soft_unresettable_controller); i++)
-               if (soft_unresettable_controller[i] == board_id)
-                       return 0;
-       return 1;
+static int ctlr_is_soft_resettable(u32 board_id)
+{
+       return !board_id_in_array(soft_unresettable_controller,
+                       ARRAY_SIZE(soft_unresettable_controller), board_id);
 }
 
 static int ctlr_is_resettable(u32 board_id)
@@ -494,6 +504,12 @@ static int ctlr_is_resettable(u32 board_id)
                ctlr_is_soft_resettable(board_id);
 }
 
+static int ctlr_needs_abort_tags_swizzled(u32 board_id)
+{
+       return board_id_in_array(needs_abort_tags_swizzled,
+                       ARRAY_SIZE(needs_abort_tags_swizzled), board_id);
+}
+
 static ssize_t host_show_resettable(struct device *dev,
        struct device_attribute *attr, char *buf)
 {
@@ -748,30 +764,43 @@ static inline u32 next_command(struct ctlr_info *h, u8 q)
  * a separate special register for submitting commands.
  */
 
-/* set_performant_mode: Modify the tag for cciss performant
+/*
+ * set_performant_mode: Modify the tag for cciss performant
  * set bit 0 for pull model, bits 3-1 for block fetch
  * register number
  */
-static void set_performant_mode(struct ctlr_info *h, struct CommandList *c)
+#define DEFAULT_REPLY_QUEUE (-1)
+static void set_performant_mode(struct ctlr_info *h, struct CommandList *c,
+                                       int reply_queue)
 {
        if (likely(h->transMethod & CFGTBL_Trans_Performant)) {
                c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1);
-               if (likely(h->msix_vector > 0))
+               if (unlikely(!h->msix_vector))
+                       return;
+               if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
                        c->Header.ReplyQueue =
                                raw_smp_processor_id() % h->nreply_queues;
+               else
+                       c->Header.ReplyQueue = reply_queue % h->nreply_queues;
        }
 }
 
 static void set_ioaccel1_performant_mode(struct ctlr_info *h,
-                                               struct CommandList *c)
+                                               struct CommandList *c,
+                                               int reply_queue)
 {
        struct io_accel1_cmd *cp = &h->ioaccel_cmd_pool[c->cmdindex];
 
-       /* Tell the controller to post the reply to the queue for this
+       /*
+        * Tell the controller to post the reply to the queue for this
         * processor.  This seems to give the best I/O throughput.
         */
-       cp->ReplyQueue = smp_processor_id() % h->nreply_queues;
-       /* Set the bits in the address sent down to include:
+       if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
+               cp->ReplyQueue = smp_processor_id() % h->nreply_queues;
+       else
+               cp->ReplyQueue = reply_queue % h->nreply_queues;
+       /*
+        * Set the bits in the address sent down to include:
         *  - performant mode bit (bit 0)
         *  - pull count (bits 1-3)
         *  - command type (bits 4-6)
@@ -781,15 +810,21 @@ static void set_ioaccel1_performant_mode(struct ctlr_info *h,
 }
 
 static void set_ioaccel2_performant_mode(struct ctlr_info *h,
-                                               struct CommandList *c)
+                                               struct CommandList *c,
+                                               int reply_queue)
 {
        struct io_accel2_cmd *cp = &h->ioaccel2_cmd_pool[c->cmdindex];
 
-       /* Tell the controller to post the reply to the queue for this
+       /*
+        * Tell the controller to post the reply to the queue for this
         * processor.  This seems to give the best I/O throughput.
         */
-       cp->reply_queue = smp_processor_id() % h->nreply_queues;
-       /* Set the bits in the address sent down to include:
+       if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
+               cp->reply_queue = smp_processor_id() % h->nreply_queues;
+       else
+               cp->reply_queue = reply_queue % h->nreply_queues;
+       /*
+        * Set the bits in the address sent down to include:
         *  - performant mode bit not used in ioaccel mode 2
         *  - pull count (bits 0-3)
         *  - command type isn't needed for ioaccel2
@@ -826,26 +861,32 @@ static void dial_up_lockup_detection_on_fw_flash_complete(struct ctlr_info *h,
                h->heartbeat_sample_interval = HEARTBEAT_SAMPLE_INTERVAL;
 }
 
-static void enqueue_cmd_and_start_io(struct ctlr_info *h,
-       struct CommandList *c)
+static void __enqueue_cmd_and_start_io(struct ctlr_info *h,
+       struct CommandList *c, int reply_queue)
 {
        dial_down_lockup_detection_during_fw_flash(h, c);
        atomic_inc(&h->commands_outstanding);
        switch (c->cmd_type) {
        case CMD_IOACCEL1:
-               set_ioaccel1_performant_mode(h, c);
+               set_ioaccel1_performant_mode(h, c, reply_queue);
                writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET);
                break;
        case CMD_IOACCEL2:
-               set_ioaccel2_performant_mode(h, c);
+               set_ioaccel2_performant_mode(h, c, reply_queue);
                writel(c->busaddr, h->vaddr + IOACCEL2_INBOUND_POSTQ_32);
                break;
        default:
-               set_performant_mode(h, c);
+               set_performant_mode(h, c, reply_queue);
                h->access.submit_command(h, c);
        }
 }
 
+static void enqueue_cmd_and_start_io(struct ctlr_info *h,
+                                       struct CommandList *c)
+{
+       __enqueue_cmd_and_start_io(h, c, DEFAULT_REPLY_QUEUE);
+}
+
 static inline int is_hba_lunid(unsigned char scsi3addr[])
 {
        return memcmp(scsi3addr, RAID_CTLR_LUNID, 8) == 0;
@@ -886,6 +927,23 @@ static int hpsa_find_target_lun(struct ctlr_info *h,
        return !found;
 }
 
+static inline void hpsa_show_dev_msg(const char *level, struct ctlr_info *h,
+       struct hpsa_scsi_dev_t *dev, char *description)
+{
+       dev_printk(level, &h->pdev->dev,
+                       "scsi %d:%d:%d:%d: %s %s %.8s %.16s RAID-%s SSDSmartPathCap%c En%c Exp=%d\n",
+                       h->scsi_host->host_no, dev->bus, dev->target, dev->lun,
+                       description,
+                       scsi_device_type(dev->devtype),
+                       dev->vendor,
+                       dev->model,
+                       dev->raid_level > RAID_UNKNOWN ?
+                               "RAID-?" : raid_label[dev->raid_level],
+                       dev->offload_config ? '+' : '-',
+                       dev->offload_enabled ? '+' : '-',
+                       dev->expose_state);
+}
+
 /* Add an entry into h->dev[] array. */
 static int hpsa_scsi_add_entry(struct ctlr_info *h, int hostno,
                struct hpsa_scsi_dev_t *device,
@@ -955,15 +1013,8 @@ lun_assigned:
        device->offload_enabled = 0;
        added[*nadded] = device;
        (*nadded)++;
-
-       /* initially, (before registering with scsi layer) we don't
-        * know our hostno and we don't want to print anything first
-        * time anyway (the scsi layer's inquiries will show that info)
-        */
-       /* if (hostno != -1) */
-               dev_info(&h->pdev->dev, "%s device c%db%dt%dl%d added.\n",
-                       scsi_device_type(device->devtype), hostno,
-                       device->bus, device->target, device->lun);
+       hpsa_show_dev_msg(KERN_INFO, h, device,
+               device->expose_state & HPSA_SCSI_ADD ? "added" : "masked");
        return 0;
 }
 
@@ -1003,6 +1054,7 @@ static void hpsa_scsi_update_entry(struct ctlr_info *h, int hostno,
        if (!new_entry->offload_enabled)
                h->dev[entry]->offload_enabled = 0;
 
+       hpsa_show_dev_msg(KERN_INFO, h, h->dev[entry], "updated");
 }
 
 /* Replace an entry from h->dev[] array. */
@@ -1030,9 +1082,7 @@ static void hpsa_scsi_replace_entry(struct ctlr_info *h, int hostno,
        h->dev[entry] = new_entry;
        added[*nadded] = new_entry;
        (*nadded)++;
-       dev_info(&h->pdev->dev, "%s device c%db%dt%dl%d changed.\n",
-               scsi_device_type(new_entry->devtype), hostno, new_entry->bus,
-                       new_entry->target, new_entry->lun);
+       hpsa_show_dev_msg(KERN_INFO, h, new_entry, "replaced");
 }
 
 /* Remove an entry from h->dev[] array. */
@@ -1052,9 +1102,7 @@ static void hpsa_scsi_remove_entry(struct ctlr_info *h, int hostno, int entry,
        for (i = entry; i < h->ndevices-1; i++)
                h->dev[i] = h->dev[i+1];
        h->ndevices--;
-       dev_info(&h->pdev->dev, "%s device c%db%dt%dl%d removed.\n",
-               scsi_device_type(sd->devtype), hostno, sd->bus, sd->target,
-               sd->lun);
+       hpsa_show_dev_msg(KERN_INFO, h, sd, "removed");
 }
 
 #define SCSI3ADDR_EQ(a, b) ( \
@@ -1435,9 +1483,7 @@ static void adjust_hpsa_scsi_table(struct ctlr_info *h, int hostno,
                 */
                if (sd[i]->volume_offline) {
                        hpsa_show_volume_status(h, sd[i]);
-                       dev_info(&h->pdev->dev, "c%db%dt%dl%d: temporarily offline\n",
-                               h->scsi_host->host_no,
-                               sd[i]->bus, sd[i]->target, sd[i]->lun);
+                       hpsa_show_dev_msg(KERN_INFO, h, sd[i], "offline");
                        continue;
                }
 
@@ -1501,10 +1547,8 @@ static void adjust_hpsa_scsi_table(struct ctlr_info *h, int hostno,
                                 * future cmds to this device will get selection
                                 * timeout as if the device was gone.
                                 */
-                               dev_warn(&h->pdev->dev,
-                                       "didn't find c%db%dt%dl%d for removal.\n",
-                                       hostno, removed[i]->bus,
-                                       removed[i]->target, removed[i]->lun);
+                               hpsa_show_dev_msg(KERN_WARNING, h, removed[i],
+                                       "didn't find device for removal.");
                        }
                }
                kfree(removed[i]);
@@ -1518,9 +1562,8 @@ static void adjust_hpsa_scsi_table(struct ctlr_info *h, int hostno,
                if (scsi_add_device(sh, added[i]->bus,
                        added[i]->target, added[i]->lun) == 0)
                        continue;
-               dev_warn(&h->pdev->dev, "scsi_add_device c%db%dt%dl%d failed, "
-                       "device not added.\n", hostno, added[i]->bus,
-                       added[i]->target, added[i]->lun);
+               hpsa_show_dev_msg(KERN_WARNING, h, added[i],
+                                       "addition failed, device not added.");
                /* now we have to remove it from h->dev,
                 * since it didn't get added to scsi mid layer
                 */
@@ -1840,6 +1883,19 @@ static void complete_scsi_command(struct CommandList *cp)
        if (cp->cmd_type == CMD_IOACCEL2 || cp->cmd_type == CMD_IOACCEL1)
                atomic_dec(&cp->phys_disk->ioaccel_cmds_out);
 
+       /*
+        * We check for lockup status here as it may be set for
+        * CMD_SCSI, CMD_IOACCEL1 and CMD_IOACCEL2 commands by
+        * fail_all_oustanding_cmds()
+        */
+       if (unlikely(ei->CommandStatus == CMD_CTLR_LOCKUP)) {
+               /* DID_NO_CONNECT will prevent a retry */
+               cmd->result = DID_NO_CONNECT << 16;
+               cmd_free(h, cp);
+               cmd->scsi_done(cmd);
+               return;
+       }
+
        if (cp->cmd_type == CMD_IOACCEL2)
                return process_ioaccel2_completion(h, cp, cmd, dev);
 
@@ -2054,14 +2110,36 @@ static int hpsa_map_one(struct pci_dev *pdev,
        return 0;
 }
 
-static inline void hpsa_scsi_do_simple_cmd_core(struct ctlr_info *h,
-       struct CommandList *c)
+#define NO_TIMEOUT ((unsigned long) -1)
+#define DEFAULT_TIMEOUT 30000 /* milliseconds */
+static int hpsa_scsi_do_simple_cmd_core(struct ctlr_info *h,
+       struct CommandList *c, int reply_queue, unsigned long timeout_msecs)
 {
        DECLARE_COMPLETION_ONSTACK(wait);
 
        c->waiting = &wait;
-       enqueue_cmd_and_start_io(h, c);
-       wait_for_completion(&wait);
+       __enqueue_cmd_and_start_io(h, c, reply_queue);
+       if (timeout_msecs == NO_TIMEOUT) {
+               /* TODO: get rid of this no-timeout thing */
+               wait_for_completion_io(&wait);
+               return IO_OK;
+       }
+       if (!wait_for_completion_io_timeout(&wait,
+                                       msecs_to_jiffies(timeout_msecs))) {
+               dev_warn(&h->pdev->dev, "Command timed out.\n");
+               return -ETIMEDOUT;
+       }
+       return IO_OK;
+}
+
+static int hpsa_scsi_do_simple_cmd(struct ctlr_info *h, struct CommandList *c,
+                                  int reply_queue, unsigned long timeout_msecs)
+{
+       if (unlikely(lockup_detected(h))) {
+               c->err_info->CommandStatus = CMD_CTLR_LOCKUP;
+               return IO_OK;
+       }
+       return hpsa_scsi_do_simple_cmd_core(h, c, reply_queue, timeout_msecs);
 }
 
 static u32 lockup_detected(struct ctlr_info *h)
@@ -2076,25 +2154,19 @@ static u32 lockup_detected(struct ctlr_info *h)
        return rc;
 }
 
-static void hpsa_scsi_do_simple_cmd_core_if_no_lockup(struct ctlr_info *h,
-       struct CommandList *c)
-{
-       /* If controller lockup detected, fake a hardware error. */
-       if (unlikely(lockup_detected(h)))
-               c->err_info->CommandStatus = CMD_HARDWARE_ERR;
-       else
-               hpsa_scsi_do_simple_cmd_core(h, c);
-}
-
 #define MAX_DRIVER_CMD_RETRIES 25
-static void hpsa_scsi_do_simple_cmd_with_retry(struct ctlr_info *h,
-       struct CommandList *c, int data_direction)
+static int hpsa_scsi_do_simple_cmd_with_retry(struct ctlr_info *h,
+       struct CommandList *c, int data_direction, unsigned long timeout_msecs)
 {
        int backoff_time = 10, retry_count = 0;
+       int rc;
 
        do {
                memset(c->err_info, 0, sizeof(*c->err_info));
-               hpsa_scsi_do_simple_cmd_core(h, c);
+               rc = hpsa_scsi_do_simple_cmd(h, c, DEFAULT_REPLY_QUEUE,
+                                                 timeout_msecs);
+               if (rc)
+                       break;
                retry_count++;
                if (retry_count > 3) {
                        msleep(backoff_time);
@@ -2105,6 +2177,9 @@ static void hpsa_scsi_do_simple_cmd_with_retry(struct ctlr_info *h,
                        check_for_busy(h, c)) &&
                        retry_count <= MAX_DRIVER_CMD_RETRIES);
        hpsa_pci_unmap(h->pdev, c, 1, data_direction);
+       if (retry_count > MAX_DRIVER_CMD_RETRIES)
+               rc = -EIO;
+       return rc;
 }
 
 static void hpsa_print_cmd(struct ctlr_info *h, char *txt,
@@ -2181,6 +2256,9 @@ static void hpsa_scsi_interpret_error(struct ctlr_info *h,
        case CMD_UNABORTABLE:
                hpsa_print_cmd(h, "unabortable", cp);
                break;
+       case CMD_CTLR_LOCKUP:
+               hpsa_print_cmd(h, "controller lockup detected", cp);
+               break;
        default:
                hpsa_print_cmd(h, "unknown status", cp);
                dev_warn(d, "Unknown command status %x\n",
@@ -2208,7 +2286,10 @@ static int hpsa_scsi_do_inquiry(struct ctlr_info *h, unsigned char *scsi3addr,
                rc = -1;
                goto out;
        }
-       hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_FROMDEVICE);
+       rc = hpsa_scsi_do_simple_cmd_with_retry(h, c,
+                                       PCI_DMA_FROMDEVICE, NO_TIMEOUT);
+       if (rc)
+               goto out;
        ei = c->err_info;
        if (ei->CommandStatus != 0 && ei->CommandStatus != CMD_DATA_UNDERRUN) {
                hpsa_scsi_interpret_error(h, c);
@@ -2238,7 +2319,10 @@ static int hpsa_bmic_ctrl_mode_sense(struct ctlr_info *h,
                rc = -1;
                goto out;
        }
-       hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_FROMDEVICE);
+       rc = hpsa_scsi_do_simple_cmd_with_retry(h, c,
+                       PCI_DMA_FROMDEVICE, NO_TIMEOUT);
+       if (rc)
+               goto out;
        ei = c->err_info;
        if (ei->CommandStatus != 0 && ei->CommandStatus != CMD_DATA_UNDERRUN) {
                hpsa_scsi_interpret_error(h, c);
@@ -2250,7 +2334,7 @@ out:
        }
 
 static int hpsa_send_reset(struct ctlr_info *h, unsigned char *scsi3addr,
-       u8 reset_type)
+       u8 reset_type, int reply_queue)
 {
        int rc = IO_OK;
        struct CommandList *c;
@@ -2267,7 +2351,11 @@ static int hpsa_send_reset(struct ctlr_info *h, unsigned char *scsi3addr,
        (void) fill_cmd(c, HPSA_DEVICE_RESET_MSG, h, NULL, 0, 0,
                        scsi3addr, TYPE_MSG);
        c->Request.CDB[1] = reset_type; /* fill_cmd defaults to LUN reset */
-       hpsa_scsi_do_simple_cmd_core(h, c);
+       rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, NO_TIMEOUT);
+       if (rc) {
+               dev_warn(&h->pdev->dev, "Failed to send reset command\n");
+               goto out;
+       }
        /* no unmap needed here because no data xfer. */
 
        ei = c->err_info;
@@ -2275,6 +2363,7 @@ static int hpsa_send_reset(struct ctlr_info *h, unsigned char *scsi3addr,
                hpsa_scsi_interpret_error(h, c);
                rc = -1;
        }
+out:
        cmd_free(h, c);
        return rc;
 }
@@ -2392,15 +2481,18 @@ static int hpsa_get_raid_map(struct ctlr_info *h,
                        sizeof(this_device->raid_map), 0,
                        scsi3addr, TYPE_CMD)) {
                dev_warn(&h->pdev->dev, "Out of memory in hpsa_get_raid_map()\n");
-               cmd_free(h, c);
-               return -ENOMEM;
+               rc = -ENOMEM;
+               goto out;
        }
-       hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_FROMDEVICE);
+       rc = hpsa_scsi_do_simple_cmd_with_retry(h, c,
+                                       PCI_DMA_FROMDEVICE, NO_TIMEOUT);
+       if (rc)
+               goto out;
        ei = c->err_info;
        if (ei->CommandStatus != 0 && ei->CommandStatus != CMD_DATA_UNDERRUN) {
                hpsa_scsi_interpret_error(h, c);
-               cmd_free(h, c);
-               return -1;
+               rc = -1;
+               goto out;
        }
        cmd_free(h, c);
 
@@ -2412,6 +2504,9 @@ static int hpsa_get_raid_map(struct ctlr_info *h,
        }
        hpsa_debug_map_buff(h, rc, &this_device->raid_map);
        return rc;
+out:
+       cmd_free(h, c);
+       return rc;
 }
 
 static int hpsa_bmic_id_physical_device(struct ctlr_info *h,
@@ -2431,7 +2526,8 @@ static int hpsa_bmic_id_physical_device(struct ctlr_info *h,
        c->Request.CDB[2] = bmic_device_index & 0xff;
        c->Request.CDB[9] = (bmic_device_index >> 8) & 0xff;
 
-       hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_FROMDEVICE);
+       hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_FROMDEVICE,
+                                               NO_TIMEOUT);
        ei = c->err_info;
        if (ei->CommandStatus != 0 && ei->CommandStatus != CMD_DATA_UNDERRUN) {
                hpsa_scsi_interpret_error(h, c);
@@ -2566,7 +2662,10 @@ static int hpsa_scsi_do_report_luns(struct ctlr_info *h, int logical,
        }
        if (extended_response)
                c->Request.CDB[1] = extended_response;
-       hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_FROMDEVICE);
+       rc = hpsa_scsi_do_simple_cmd_with_retry(h, c,
+                                       PCI_DMA_FROMDEVICE, NO_TIMEOUT);
+       if (rc)
+               goto out;
        ei = c->err_info;
        if (ei->CommandStatus != 0 &&
            ei->CommandStatus != CMD_DATA_UNDERRUN) {
@@ -2659,7 +2758,7 @@ static int hpsa_volume_offline(struct ctlr_info *h,
 {
        struct CommandList *c;
        unsigned char *sense, sense_key, asc, ascq;
-       int ldstat = 0;
+       int rc, ldstat = 0;
        u16 cmd_status;
        u8 scsi_status;
 #define ASC_LUN_NOT_READY 0x04
@@ -2670,7 +2769,11 @@ static int hpsa_volume_offline(struct ctlr_info *h,
        if (!c)
                return 0;
        (void) fill_cmd(c, TEST_UNIT_READY, h, NULL, 0, 0, scsi3addr, TYPE_CMD);
-       hpsa_scsi_do_simple_cmd_core(h, c);
+       rc = hpsa_scsi_do_simple_cmd(h, c, DEFAULT_REPLY_QUEUE, NO_TIMEOUT);
+       if (rc) {
+               cmd_free(h, c);
+               return 0;
+       }
        sense = c->err_info->SenseInfo;
        sense_key = sense[2];
        asc = sense[12];
@@ -2714,6 +2817,50 @@ static int hpsa_volume_offline(struct ctlr_info *h,
        return 0;
 }
 
+/*
+ * Find out if a logical device supports aborts by simply trying one.
+ * Smart Array may claim not to support aborts on logical drives, but
+ * if a MSA2000 * is connected, the drives on that will be presented
+ * by the Smart Array as logical drives, and aborts may be sent to
+ * those devices successfully.  So the simplest way to find out is
+ * to simply try an abort and see how the device responds.
+ */
+static int hpsa_device_supports_aborts(struct ctlr_info *h,
+                                       unsigned char *scsi3addr)
+{
+       struct CommandList *c;
+       struct ErrorInfo *ei;
+       int rc = 0;
+
+       u64 tag = (u64) -1; /* bogus tag */
+
+       /* Assume that physical devices support aborts */
+       if (!is_logical_dev_addr_mode(scsi3addr))
+               return 1;
+
+       c = cmd_alloc(h);
+       if (!c)
+               return -ENOMEM;
+       (void) fill_cmd(c, HPSA_ABORT_MSG, h, &tag, 0, 0, scsi3addr, TYPE_MSG);
+       (void) hpsa_scsi_do_simple_cmd(h, c, DEFAULT_REPLY_QUEUE, NO_TIMEOUT);
+       /* no unmap needed here because no data xfer. */
+       ei = c->err_info;
+       switch (ei->CommandStatus) {
+       case CMD_INVALID:
+               rc = 0;
+               break;
+       case CMD_UNABORTABLE:
+       case CMD_ABORT_FAILED:
+               rc = 1;
+               break;
+       default:
+               rc = 0;
+               break;
+       }
+       cmd_free(h, c);
+       return rc;
+}
+
 static int hpsa_update_device_info(struct ctlr_info *h,
        unsigned char scsi3addr[], struct hpsa_scsi_dev_t *this_device,
        unsigned char *is_OBDR_device)
@@ -2780,7 +2927,6 @@ static int hpsa_update_device_info(struct ctlr_info *h,
                                        strncmp(obdr_sig, OBDR_TAPE_SIG,
                                                OBDR_SIG_LEN) == 0);
        }
-
        kfree(inq_buff);
        return 0;
 
@@ -2789,6 +2935,31 @@ bail_out:
        return 1;
 }
 
+static void hpsa_update_device_supports_aborts(struct ctlr_info *h,
+                       struct hpsa_scsi_dev_t *dev, u8 *scsi3addr)
+{
+       unsigned long flags;
+       int rc, entry;
+       /*
+        * See if this device supports aborts.  If we already know
+        * the device, we already know if it supports aborts, otherwise
+        * we have to find out if it supports aborts by trying one.
+        */
+       spin_lock_irqsave(&h->devlock, flags);
+       rc = hpsa_scsi_find_entry(dev, h->dev, h->ndevices, &entry);
+       if ((rc == DEVICE_SAME || rc == DEVICE_UPDATED) &&
+               entry >= 0 && entry < h->ndevices) {
+               dev->supports_aborts = h->dev[entry]->supports_aborts;
+               spin_unlock_irqrestore(&h->devlock, flags);
+       } else {
+               spin_unlock_irqrestore(&h->devlock, flags);
+               dev->supports_aborts =
+                               hpsa_device_supports_aborts(h, scsi3addr);
+               if (dev->supports_aborts < 0)
+                       dev->supports_aborts = 0;
+       }
+}
+
 static unsigned char *ext_target_model[] = {
        "MSA2012",
        "MSA2024",
@@ -2894,6 +3065,7 @@ static int add_ext_target_dev(struct ctlr_info *h,
        (*n_ext_target_devs)++;
        hpsa_set_bus_target_lun(this_device,
                                tmpdevice->bus, tmpdevice->target, 0);
+       hpsa_update_device_supports_aborts(h, this_device, scsi3addr);
        set_bit(tmpdevice->target, lunzerobits);
        return 1;
 }
@@ -3148,6 +3320,7 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
                                                        &is_OBDR))
                        continue; /* skip it if we can't talk to it. */
                figure_bus_target_lun(h, lunaddrbytes, tmpdevice);
+               hpsa_update_device_supports_aborts(h, tmpdevice, lunaddrbytes);
                this_device = currentsd[ncurrent];
 
                /*
@@ -4003,7 +4176,11 @@ static int hpsa_scsi_ioaccel_raid_map(struct ctlr_info *h,
                                                dev->phys_disk[map_index]);
 }
 
-/* Submit commands down the "normal" RAID stack path */
+/*
+ * Submit commands down the "normal" RAID stack path
+ * All callers to hpsa_ciss_submit must check lockup_detected
+ * beforehand, before (opt.) and after calling cmd_alloc
+ */
 static int hpsa_ciss_submit(struct ctlr_info *h,
        struct CommandList *c, struct scsi_cmnd *cmd,
        unsigned char scsi3addr[])
@@ -4114,7 +4291,7 @@ static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd)
        memcpy(scsi3addr, dev->scsi3addr, sizeof(scsi3addr));
 
        if (unlikely(lockup_detected(h))) {
-               cmd->result = DID_ERROR << 16;
+               cmd->result = DID_NO_CONNECT << 16;
                cmd->scsi_done(cmd);
                return 0;
        }
@@ -4124,7 +4301,7 @@ static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd)
                return SCSI_MLQUEUE_HOST_BUSY;
        }
        if (unlikely(lockup_detected(h))) {
-               cmd->result = DID_ERROR << 16;
+               cmd->result = DID_NO_CONNECT << 16;
                cmd_free(h, c);
                cmd->scsi_done(cmd);
                return 0;
@@ -4319,7 +4496,10 @@ static int wait_for_device_to_become_ready(struct ctlr_info *h,
                /* Send the Test Unit Ready, fill_cmd can't fail, no mapping */
                (void) fill_cmd(c, TEST_UNIT_READY, h,
                                NULL, 0, 0, lunaddr, TYPE_CMD);
-               hpsa_scsi_do_simple_cmd_core(h, c);
+               rc = hpsa_scsi_do_simple_cmd(h, c, DEFAULT_REPLY_QUEUE,
+                                               NO_TIMEOUT);
+               if (rc)
+                       goto do_it_again;
                /* no unmap needed here because no data xfer. */
 
                if (c->err_info->CommandStatus == CMD_SUCCESS)
@@ -4330,7 +4510,7 @@ static int wait_for_device_to_become_ready(struct ctlr_info *h,
                        (c->err_info->SenseInfo[2] == NO_SENSE ||
                        c->err_info->SenseInfo[2] == UNIT_ATTENTION))
                        break;
-
+do_it_again:
                dev_warn(&h->pdev->dev, "waiting %d secs "
                        "for device to become ready.\n", waittime);
                rc = 1; /* device not ready. */
@@ -4368,14 +4548,36 @@ static int hpsa_eh_device_reset_handler(struct scsi_cmnd *scsicmd)
                        "device lookup failed.\n");
                return FAILED;
        }
-       dev_warn(&h->pdev->dev, "resetting device %d:%d:%d:%d\n",
-               h->scsi_host->host_no, dev->bus, dev->target, dev->lun);
+
+       /* if controller locked up, we can guarantee command won't complete */
+       if (lockup_detected(h)) {
+               dev_warn(&h->pdev->dev,
+                       "scsi %d:%d:%d:%d RESET FAILED, lockup detected\n",
+                       h->scsi_host->host_no, dev->bus, dev->target,
+                       dev->lun);
+               return FAILED;
+       }
+
+       /* this reset request might be the result of a lockup; check */
+       if (detect_controller_lockup(h)) {
+               dev_warn(&h->pdev->dev,
+                        "scsi %d:%d:%d:%d RESET FAILED, new lockup detected\n",
+                        h->scsi_host->host_no, dev->bus, dev->target,
+                        dev->lun);
+               return FAILED;
+       }
+
+       hpsa_show_dev_msg(KERN_WARNING, h, dev, "resetting");
+
        /* send a reset to the SCSI LUN which the command was sent to */
-       rc = hpsa_send_reset(h, dev->scsi3addr, HPSA_RESET_TYPE_LUN);
+       rc = hpsa_send_reset(h, dev->scsi3addr, HPSA_RESET_TYPE_LUN,
+                            DEFAULT_REPLY_QUEUE);
        if (rc == 0 && wait_for_device_to_become_ready(h, dev->scsi3addr) == 0)
                return SUCCESS;
 
-       dev_warn(&h->pdev->dev, "resetting device failed.\n");
+       dev_warn(&h->pdev->dev,
+               "scsi %d:%d:%d:%d reset failed\n",
+               h->scsi_host->host_no, dev->bus, dev->target, dev->lun);
        return FAILED;
 }
 
@@ -4420,7 +4622,7 @@ static void hpsa_get_tag(struct ctlr_info *h,
 }
 
 static int hpsa_send_abort(struct ctlr_info *h, unsigned char *scsi3addr,
-       struct CommandList *abort, int swizzle)
+       struct CommandList *abort, int reply_queue)
 {
        int rc = IO_OK;
        struct CommandList *c;
@@ -4434,13 +4636,13 @@ static int hpsa_send_abort(struct ctlr_info *h, unsigned char *scsi3addr,
        }
 
        /* fill_cmd can't fail here, no buffer to map */
-       (void) fill_cmd(c, HPSA_ABORT_MSG, h, abort,
+       (void) fill_cmd(c, HPSA_ABORT_MSG, h, &abort->Header.tag,
                0, 0, scsi3addr, TYPE_MSG);
-       if (swizzle)
+       if (h->needs_abort_tags_swizzled)
                swizzle_abort_tag(&c->Request.CDB[4]);
-       hpsa_scsi_do_simple_cmd_core(h, c);
+       (void) hpsa_scsi_do_simple_cmd(h, c, reply_queue, NO_TIMEOUT);
        hpsa_get_tag(h, abort, &taglower, &tagupper);
-       dev_dbg(&h->pdev->dev, "%s: Tag:0x%08x:%08x: do_simple_cmd_core completed.\n",
+       dev_dbg(&h->pdev->dev, "%s: Tag:0x%08x:%08x: do_simple_cmd(abort) completed.\n",
                __func__, tagupper, taglower);
        /* no unmap needed here because no data xfer. */
 
@@ -4472,7 +4674,7 @@ static int hpsa_send_abort(struct ctlr_info *h, unsigned char *scsi3addr,
  */
 
 static int hpsa_send_reset_as_abort_ioaccel2(struct ctlr_info *h,
-       unsigned char *scsi3addr, struct CommandList *abort)
+       unsigned char *scsi3addr, struct CommandList *abort, int reply_queue)
 {
        int rc = IO_OK;
        struct scsi_cmnd *scmd; /* scsi command within request being aborted */
@@ -4491,8 +4693,9 @@ static int hpsa_send_reset_as_abort_ioaccel2(struct ctlr_info *h,
 
        if (h->raid_offload_debug > 0)
                dev_info(&h->pdev->dev,
-                       "Reset as abort: Abort requested on C%d:B%d:T%d:L%d scsi3addr 0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
+                       "scsi %d:%d:%d:%d %s scsi3addr 0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
                        h->scsi_host->host_no, dev->bus, dev->target, dev->lun,
+                       "Reset as abort",
                        scsi3addr[0], scsi3addr[1], scsi3addr[2], scsi3addr[3],
                        scsi3addr[4], scsi3addr[5], scsi3addr[6], scsi3addr[7]);
 
@@ -4514,7 +4717,7 @@ static int hpsa_send_reset_as_abort_ioaccel2(struct ctlr_info *h,
                        "Reset as abort: Resetting physical device at scsi3addr 0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
                        psa[0], psa[1], psa[2], psa[3],
                        psa[4], psa[5], psa[6], psa[7]);
-       rc = hpsa_send_reset(h, psa, HPSA_RESET_TYPE_TARGET);
+       rc = hpsa_send_reset(h, psa, HPSA_RESET_TYPE_TARGET, reply_queue);
        if (rc != 0) {
                dev_warn(&h->pdev->dev,
                        "Reset as abort: Failed on physical device at scsi3addr 0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
@@ -4541,14 +4744,8 @@ static int hpsa_send_reset_as_abort_ioaccel2(struct ctlr_info *h,
        return rc; /* success */
 }
 
-/* Some Smart Arrays need the abort tag swizzled, and some don't.  It's hard to
- * tell which kind we're dealing with, so we send the abort both ways.  There
- * shouldn't be any collisions between swizzled and unswizzled tags due to the
- * way we construct our tags but we check anyway in case the assumptions which
- * make this true someday become false.
- */
 static int hpsa_send_abort_both_ways(struct ctlr_info *h,
-       unsigned char *scsi3addr, struct CommandList *abort)
+       unsigned char *scsi3addr, struct CommandList *abort, int reply_queue)
 {
        /* ioccelerator mode 2 commands should be aborted via the
         * accelerated path, since RAID path is unaware of these commands,
@@ -4556,10 +4753,30 @@ static int hpsa_send_abort_both_ways(struct ctlr_info *h,
         * Change abort to physical device reset.
         */
        if (abort->cmd_type == CMD_IOACCEL2)
-               return hpsa_send_reset_as_abort_ioaccel2(h, scsi3addr, abort);
+               return hpsa_send_reset_as_abort_ioaccel2(h, scsi3addr,
+                                                       abort, reply_queue);
+       return hpsa_send_abort(h, scsi3addr, abort, reply_queue);
+}
 
-       return hpsa_send_abort(h, scsi3addr, abort, 0) &&
-                       hpsa_send_abort(h, scsi3addr, abort, 1);
+/* Find out which reply queue a command was meant to return on */
+static int hpsa_extract_reply_queue(struct ctlr_info *h,
+                                       struct CommandList *c)
+{
+       if (c->cmd_type == CMD_IOACCEL2)
+               return h->ioaccel2_cmd_pool[c->cmdindex].reply_queue;
+       return c->Header.ReplyQueue;
+}
+
+/*
+ * Limit concurrency of abort commands to prevent
+ * over-subscription of commands
+ */
+static inline int wait_for_available_abort_cmd(struct ctlr_info *h)
+{
+#define ABORT_CMD_WAIT_MSECS 5000
+       return !wait_event_timeout(h->abort_cmd_wait_queue,
+                       atomic_dec_if_positive(&h->abort_cmds_available) >= 0,
+                       msecs_to_jiffies(ABORT_CMD_WAIT_MSECS));
 }
 
 /* Send an abort for the specified command.
@@ -4577,27 +4794,19 @@ static int hpsa_eh_abort_handler(struct scsi_cmnd *sc)
        char msg[256];          /* For debug messaging. */
        int ml = 0;
        __le32 tagupper, taglower;
-       int refcount;
+       int refcount, reply_queue;
 
-       /* Find the controller of the command to be aborted */
-       h = sdev_to_hba(sc->device);
-       if (WARN(h == NULL,
-                       "ABORT REQUEST FAILED, Controller lookup failed.\n"))
+       if (sc == NULL)
                return FAILED;
 
-       if (lockup_detected(h))
+       if (sc->device == NULL)
                return FAILED;
 
-       /* Check that controller supports some kind of task abort */
-       if (!(HPSATMF_PHYS_TASK_ABORT & h->TMFSupportFlags) &&
-               !(HPSATMF_LOG_TASK_ABORT & h->TMFSupportFlags))
+       /* Find the controller of the command to be aborted */
+       h = sdev_to_hba(sc->device);
+       if (h == NULL)
                return FAILED;
 
-       memset(msg, 0, sizeof(msg));
-       ml += sprintf(msg+ml, "ABORT REQUEST on C%d:B%d:T%d:L%llu ",
-               h->scsi_host->host_no, sc->device->channel,
-               sc->device->id, sc->device->lun);
-
        /* Find the device of the command to be aborted */
        dev = sc->device->hostdata;
        if (!dev) {
@@ -4606,6 +4815,31 @@ static int hpsa_eh_abort_handler(struct scsi_cmnd *sc)
                return FAILED;
        }
 
+       /* If controller locked up, we can guarantee command won't complete */
+       if (lockup_detected(h)) {
+               hpsa_show_dev_msg(KERN_WARNING, h, dev,
+                                       "ABORT FAILED, lockup detected");
+               return FAILED;
+       }
+
+       /* This is a good time to check if controller lockup has occurred */
+       if (detect_controller_lockup(h)) {
+               hpsa_show_dev_msg(KERN_WARNING, h, dev,
+                                       "ABORT FAILED, new lockup detected");
+               return FAILED;
+       }
+
+       /* Check that controller supports some kind of task abort */
+       if (!(HPSATMF_PHYS_TASK_ABORT & h->TMFSupportFlags) &&
+               !(HPSATMF_LOG_TASK_ABORT & h->TMFSupportFlags))
+               return FAILED;
+
+       memset(msg, 0, sizeof(msg));
+       ml += sprintf(msg+ml, "scsi %d:%d:%d:%llu %s",
+               h->scsi_host->host_no, sc->device->channel,
+               sc->device->id, sc->device->lun,
+               "Aborting command");
+
        /* Get SCSI command to be aborted */
        abort = (struct CommandList *) sc->host_scribble;
        if (abort == NULL) {
@@ -4617,26 +4851,40 @@ static int hpsa_eh_abort_handler(struct scsi_cmnd *sc)
                cmd_free(h, abort);
                return SUCCESS;
        }
+
+       /* Don't bother trying the abort if we know it won't work. */
+       if (abort->cmd_type != CMD_IOACCEL2 &&
+               abort->cmd_type != CMD_IOACCEL1 && !dev->supports_aborts) {
+               cmd_free(h, abort);
+               return FAILED;
+       }
+
        hpsa_get_tag(h, abort, &taglower, &tagupper);
+       reply_queue = hpsa_extract_reply_queue(h, abort);
        ml += sprintf(msg+ml, "Tag:0x%08x:%08x ", tagupper, taglower);
        as  = abort->scsi_cmd;
        if (as != NULL)
                ml += sprintf(msg+ml, "Command:0x%x SN:0x%lx ",
                        as->cmnd[0], as->serial_number);
        dev_dbg(&h->pdev->dev, "%s\n", msg);
-       dev_warn(&h->pdev->dev, "Abort request on C%d:B%d:T%d:L%d\n",
-               h->scsi_host->host_no, dev->bus, dev->target, dev->lun);
+       hpsa_show_dev_msg(KERN_WARNING, h, dev, "Aborting command");
        /*
         * Command is in flight, or possibly already completed
         * by the firmware (but not to the scsi mid layer) but we can't
         * distinguish which.  Send the abort down.
         */
-       rc = hpsa_send_abort_both_ways(h, dev->scsi3addr, abort);
+       if (wait_for_available_abort_cmd(h)) {
+               dev_warn(&h->pdev->dev,
+                       "Timed out waiting for an abort command to become available.\n");
+               cmd_free(h, abort);
+               return FAILED;
+       }
+       rc = hpsa_send_abort_both_ways(h, dev->scsi3addr, abort, reply_queue);
+       atomic_inc(&h->abort_cmds_available);
+       wake_up_all(&h->abort_cmd_wait_queue);
        if (rc != 0) {
-               dev_dbg(&h->pdev->dev, "%s Request FAILED.\n", msg);
-               dev_warn(&h->pdev->dev, "FAILED abort on device C%d:B%d:T%d:L%d\n",
-                       h->scsi_host->host_no,
-                       dev->bus, dev->target, dev->lun);
+               hpsa_show_dev_msg(KERN_WARNING, h, dev,
+                                       "FAILED to abort command");
                cmd_free(h, abort);
                return FAILED;
        }
@@ -4956,10 +5204,14 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp)
                c->SG[0].Len = cpu_to_le32(iocommand.buf_size);
                c->SG[0].Ext = cpu_to_le32(HPSA_SG_LAST); /* not chaining */
        }
-       hpsa_scsi_do_simple_cmd_core_if_no_lockup(h, c);
+       rc = hpsa_scsi_do_simple_cmd(h, c, DEFAULT_REPLY_QUEUE, NO_TIMEOUT);
        if (iocommand.buf_size > 0)
                hpsa_pci_unmap(h->pdev, c, 1, PCI_DMA_BIDIRECTIONAL);
        check_ioctl_unit_attention(h, c);
+       if (rc) {
+               rc = -EIO;
+               goto out;
+       }
 
        /* Copy the error information out */
        memcpy(&iocommand.error_info, c->err_info,
@@ -5086,10 +5338,15 @@ static int hpsa_big_passthru_ioctl(struct ctlr_info *h, void __user *argp)
                }
                c->SG[--i].Ext = cpu_to_le32(HPSA_SG_LAST);
        }
-       hpsa_scsi_do_simple_cmd_core_if_no_lockup(h, c);
+       status = hpsa_scsi_do_simple_cmd(h, c, DEFAULT_REPLY_QUEUE, NO_TIMEOUT);
        if (sg_used)
                hpsa_pci_unmap(h->pdev, c, sg_used, PCI_DMA_BIDIRECTIONAL);
        check_ioctl_unit_attention(h, c);
+       if (status) {
+               status = -EIO;
+               goto cleanup0;
+       }
+
        /* Copy the error information out */
        memcpy(&ioc->error_info, c->err_info, sizeof(ioc->error_info));
        if (copy_to_user(argp, ioc, sizeof(*ioc))) {
@@ -5197,7 +5454,7 @@ static int fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
        int cmd_type)
 {
        int pci_dir = XFER_NONE;
-       struct CommandList *a; /* for commands to be aborted */
+       u64 tag; /* for commands to be aborted */
 
        c->cmd_type = CMD_IOCTL_PEND;
        c->Header.ReplyQueue = 0;
@@ -5313,10 +5570,10 @@ static int fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
                        c->Request.CDB[7] = 0x00;
                        break;
                case  HPSA_ABORT_MSG:
-                       a = buff;       /* point to command to be aborted */
+                       memcpy(&tag, buff, sizeof(tag));
                        dev_dbg(&h->pdev->dev,
-                               "Abort Tag:0x%016llx request Tag:0x%016llx",
-                               a->Header.tag, c->Header.tag);
+                               "Abort Tag:0x%016llx using rqst Tag:0x%016llx",
+                               tag, c->Header.tag);
                        c->Request.CDBLen = 16;
                        c->Request.type_attr_dir =
                                        TYPE_ATTR_DIR(cmd_type,
@@ -5327,8 +5584,7 @@ static int fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
                        c->Request.CDB[2] = 0x00; /* reserved */
                        c->Request.CDB[3] = 0x00; /* reserved */
                        /* Tag to abort goes in CDB[4]-CDB[11] */
-                       memcpy(&c->Request.CDB[4], &a->Header.tag,
-                               sizeof(a->Header.tag));
+                       memcpy(&c->Request.CDB[4], &tag, sizeof(tag));
                        c->Request.CDB[12] = 0x00; /* reserved */
                        c->Request.CDB[13] = 0x00; /* reserved */
                        c->Request.CDB[14] = 0x00; /* reserved */
@@ -6227,6 +6483,8 @@ static int hpsa_wait_for_mode_change_ack(struct ctlr_info *h)
         * as we enter this code.)
         */
        for (i = 0; i < MAX_MODE_CHANGE_WAIT; i++) {
+               if (h->remove_in_progress)
+                       goto done;
                spin_lock_irqsave(&h->lock, flags);
                doorbell_value = readl(h->vaddr + SA5_DOORBELL);
                spin_unlock_irqrestore(&h->lock, flags);
@@ -6277,6 +6535,9 @@ static int hpsa_pci_init(struct ctlr_info *h)
        h->product_name = products[prod_index].product_name;
        h->access = *(products[prod_index].access);
 
+       h->needs_abort_tags_swizzled =
+               ctlr_needs_abort_tags_swizzled(h->board_id);
+
        pci_disable_link_state(h->pdev, PCIE_LINK_STATE_L0S |
                               PCIE_LINK_STATE_L1 | PCIE_LINK_STATE_CLKPM);
 
@@ -6622,17 +6883,21 @@ static void fail_all_outstanding_cmds(struct ctlr_info *h)
 {
        int i, refcount;
        struct CommandList *c;
+       int failcount = 0;
 
        flush_workqueue(h->resubmit_wq); /* ensure all cmds are fully built */
        for (i = 0; i < h->nr_cmds; i++) {
                c = h->cmd_pool + i;
                refcount = atomic_inc_return(&c->refcount);
                if (refcount > 1) {
-                       c->err_info->CommandStatus = CMD_HARDWARE_ERR;
+                       c->err_info->CommandStatus = CMD_CTLR_LOCKUP;
                        finish_cmd(c);
+                       failcount++;
                }
                cmd_free(h, c);
        }
+       dev_warn(&h->pdev->dev,
+               "failed %d commands in fail_all\n", failcount);
 }
 
 static void set_lockup_detected_for_all_cpus(struct ctlr_info *h, u32 value)
@@ -6658,18 +6923,19 @@ static void controller_lockup_detected(struct ctlr_info *h)
        if (!lockup_detected) {
                /* no heartbeat, but controller gave us a zero. */
                dev_warn(&h->pdev->dev,
-                       "lockup detected but scratchpad register is zero\n");
+                       "lockup detected after %d but scratchpad register is zero\n",
+                       h->heartbeat_sample_interval / HZ);
                lockup_detected = 0xffffffff;
        }
        set_lockup_detected_for_all_cpus(h, lockup_detected);
        spin_unlock_irqrestore(&h->lock, flags);
-       dev_warn(&h->pdev->dev, "Controller lockup detected: 0x%08x\n",
-                       lockup_detected);
+       dev_warn(&h->pdev->dev, "Controller lockup detected: 0x%08x after %d\n",
+                       lockup_detected, h->heartbeat_sample_interval / HZ);
        pci_disable_device(h->pdev);
        fail_all_outstanding_cmds(h);
 }
 
-static void detect_controller_lockup(struct ctlr_info *h)
+static int detect_controller_lockup(struct ctlr_info *h)
 {
        u64 now;
        u32 heartbeat;
@@ -6679,7 +6945,7 @@ static void detect_controller_lockup(struct ctlr_info *h)
        /* If we've received an interrupt recently, we're ok. */
        if (time_after64(h->last_intr_timestamp +
                                (h->heartbeat_sample_interval), now))
-               return;
+               return false;
 
        /*
         * If we've already checked the heartbeat recently, we're ok.
@@ -6688,7 +6954,7 @@ static void detect_controller_lockup(struct ctlr_info *h)
         */
        if (time_after64(h->last_heartbeat_timestamp +
                                (h->heartbeat_sample_interval), now))
-               return;
+               return false;
 
        /* If heartbeat has not changed since we last looked, we're not ok. */
        spin_lock_irqsave(&h->lock, flags);
@@ -6696,12 +6962,13 @@ static void detect_controller_lockup(struct ctlr_info *h)
        spin_unlock_irqrestore(&h->lock, flags);
        if (h->last_heartbeat == heartbeat) {
                controller_lockup_detected(h);
-               return;
+               return true;
        }
 
        /* We're ok. */
        h->last_heartbeat = heartbeat;
        h->last_heartbeat_timestamp = now;
+       return false;
 }
 
 static void hpsa_ack_ctlr_events(struct ctlr_info *h)
@@ -6890,6 +7157,7 @@ reinit_after_soft_reset:
        spin_lock_init(&h->offline_device_lock);
        spin_lock_init(&h->scan_lock);
        atomic_set(&h->passthru_cmds_avail, HPSA_MAX_CONCURRENT_PASSTHRUS);
+       atomic_set(&h->abort_cmds_available, HPSA_CMDS_RESERVED_FOR_ABORTS);
 
        h->rescan_ctlr_wq = hpsa_create_controller_wq(h, "rescan");
        if (!h->rescan_ctlr_wq) {
@@ -6947,6 +7215,7 @@ reinit_after_soft_reset:
        if (hpsa_allocate_sg_chain_blocks(h))
                goto clean4;
        init_waitqueue_head(&h->scan_wait_queue);
+       init_waitqueue_head(&h->abort_cmd_wait_queue);
        h->scan_finished = 1; /* no scan currently in progress */
 
        pci_set_drvdata(pdev, h);
@@ -7052,8 +7321,10 @@ static void hpsa_flush_cache(struct ctlr_info *h)
 {
        char *flush_buf;
        struct CommandList *c;
+       int rc;
 
        /* Don't bother trying to flush the cache if locked up */
+       /* FIXME not necessary if do_simple_cmd does the check */
        if (unlikely(lockup_detected(h)))
                return;
        flush_buf = kzalloc(4, GFP_KERNEL);
@@ -7069,7 +7340,10 @@ static void hpsa_flush_cache(struct ctlr_info *h)
                RAID_CTLR_LUNID, TYPE_CMD)) {
                goto out;
        }
-       hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_TODEVICE);
+       rc = hpsa_scsi_do_simple_cmd_with_retry(h, c,
+                                       PCI_DMA_TODEVICE, NO_TIMEOUT);
+       if (rc)
+               goto out;
        if (c->err_info->CommandStatus != 0)
 out:
                dev_warn(&h->pdev->dev,