hpsa: clean up aborts

[cascardo/linux.git] / drivers / scsi / hpsa.c
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c

index 82390ad..ae9d9e1 100644 (file)
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -253,6 +253,8 @@ static int hpsa_scsi_ioaccel_queue_command(struct ctlr_info *h,
         struct CommandList *c, u32 ioaccel_handle, u8 *cdb, int cdb_len,
         u8 *scsi3addr, struct hpsa_scsi_dev_t *phys_disk);
  static void hpsa_command_resubmit_worker(struct work_struct *work);
+static u32 lockup_detected(struct ctlr_info *h);
+static int detect_controller_lockup(struct ctlr_info *h);
  
  static inline struct ctlr_info *sdev_to_hba(struct scsi_device *sdev)
  {
@@ -426,7 +428,7 @@ static ssize_t host_show_hp_ssd_smart_path_status(struct device *dev,
  /* List of controllers which cannot be hard reset on kexec with reset_devices */
  static u32 unresettable_controller[] = {
         0x324a103C, /* Smart Array P712m */
-       0x324b103C, /* SmartArray P711m */
+       0x324b103C, /* Smart Array P711m */
         0x3223103C, /* Smart Array P800 */
         0x3234103C, /* Smart Array P400 */
         0x3235103C, /* Smart Array P400i */
@@ -468,24 +470,32 @@ static u32 soft_unresettable_controller[] = {
         0x409D0E11, /* Smart Array 6400 EM */
  };
  
-static int ctlr_is_hard_resettable(u32 board_id)
+static u32 needs_abort_tags_swizzled[] = {
+       0x323D103C, /* Smart Array P700m */
+       0x324a103C, /* Smart Array P712m */
+       0x324b103C, /* SmartArray P711m */
+};
+
+static int board_id_in_array(u32 a[], int nelems, u32 board_id)
  {
         int i;
  
-       for (i = 0; i < ARRAY_SIZE(unresettable_controller); i++)
-               if (unresettable_controller[i] == board_id)
-                       return 0;
-       return 1;
+       for (i = 0; i < nelems; i++)
+               if (a[i] == board_id)
+                       return 1;
+       return 0;
  }
  
-static int ctlr_is_soft_resettable(u32 board_id)
+static int ctlr_is_hard_resettable(u32 board_id)
  {
-       int i;
+       return !board_id_in_array(unresettable_controller,
+                       ARRAY_SIZE(unresettable_controller), board_id);
+}
  
-       for (i = 0; i < ARRAY_SIZE(soft_unresettable_controller); i++)
-               if (soft_unresettable_controller[i] == board_id)
-                       return 0;
-       return 1;
+static int ctlr_is_soft_resettable(u32 board_id)
+{
+       return !board_id_in_array(soft_unresettable_controller,
+                       ARRAY_SIZE(soft_unresettable_controller), board_id);
  }
  
  static int ctlr_is_resettable(u32 board_id)
@@ -494,6 +504,12 @@ static int ctlr_is_resettable(u32 board_id)
                 ctlr_is_soft_resettable(board_id);
  }
  
+static int ctlr_needs_abort_tags_swizzled(u32 board_id)
+{
+       return board_id_in_array(needs_abort_tags_swizzled,
+                       ARRAY_SIZE(needs_abort_tags_swizzled), board_id);
+}
+
  static ssize_t host_show_resettable(struct device *dev,
         struct device_attribute *attr, char *buf)
  {
@@ -748,30 +764,43 @@ static inline u32 next_command(struct ctlr_info *h, u8 q)
   * a separate special register for submitting commands.
   */
  
-/* set_performant_mode: Modify the tag for cciss performant
+/*
+ * set_performant_mode: Modify the tag for cciss performant
   * set bit 0 for pull model, bits 3-1 for block fetch
   * register number
   */
-static void set_performant_mode(struct ctlr_info *h, struct CommandList *c)
+#define DEFAULT_REPLY_QUEUE (-1)
+static void set_performant_mode(struct ctlr_info *h, struct CommandList *c,
+                                       int reply_queue)
  {
         if (likely(h->transMethod & CFGTBL_Trans_Performant)) {
                 c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1);
-               if (likely(h->msix_vector > 0))
+               if (unlikely(!h->msix_vector))
+                       return;
+               if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
                         c->Header.ReplyQueue =
                                 raw_smp_processor_id() % h->nreply_queues;
+               else
+                       c->Header.ReplyQueue = reply_queue % h->nreply_queues;
         }
  }
  
  static void set_ioaccel1_performant_mode(struct ctlr_info *h,
-                                               struct CommandList *c)
+                                               struct CommandList *c,
+                                               int reply_queue)
  {
         struct io_accel1_cmd *cp = &h->ioaccel_cmd_pool[c->cmdindex];
  
-       /* Tell the controller to post the reply to the queue for this
+       /*
+        * Tell the controller to post the reply to the queue for this
          * processor.  This seems to give the best I/O throughput.
          */
-       cp->ReplyQueue = smp_processor_id() % h->nreply_queues;
-       /* Set the bits in the address sent down to include:
+       if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
+               cp->ReplyQueue = smp_processor_id() % h->nreply_queues;
+       else
+               cp->ReplyQueue = reply_queue % h->nreply_queues;
+       /*
+        * Set the bits in the address sent down to include:
          *  - performant mode bit (bit 0)
          *  - pull count (bits 1-3)
          *  - command type (bits 4-6)
@@ -781,15 +810,21 @@ static void set_ioaccel1_performant_mode(struct ctlr_info *h,
  }
  
  static void set_ioaccel2_performant_mode(struct ctlr_info *h,
-                                               struct CommandList *c)
+                                               struct CommandList *c,
+                                               int reply_queue)
  {
         struct io_accel2_cmd *cp = &h->ioaccel2_cmd_pool[c->cmdindex];
  
-       /* Tell the controller to post the reply to the queue for this
+       /*
+        * Tell the controller to post the reply to the queue for this
          * processor.  This seems to give the best I/O throughput.
          */
-       cp->reply_queue = smp_processor_id() % h->nreply_queues;
-       /* Set the bits in the address sent down to include:
+       if (likely(reply_queue == DEFAULT_REPLY_QUEUE))
+               cp->reply_queue = smp_processor_id() % h->nreply_queues;
+       else
+               cp->reply_queue = reply_queue % h->nreply_queues;
+       /*
+        * Set the bits in the address sent down to include:
          *  - performant mode bit not used in ioaccel mode 2
          *  - pull count (bits 0-3)
          *  - command type isn't needed for ioaccel2
@@ -826,26 +861,32 @@ static void dial_up_lockup_detection_on_fw_flash_complete(struct ctlr_info *h,
                 h->heartbeat_sample_interval = HEARTBEAT_SAMPLE_INTERVAL;
  }
  
-static void enqueue_cmd_and_start_io(struct ctlr_info *h,
-       struct CommandList *c)
+static void __enqueue_cmd_and_start_io(struct ctlr_info *h,
+       struct CommandList *c, int reply_queue)
  {
         dial_down_lockup_detection_during_fw_flash(h, c);
         atomic_inc(&h->commands_outstanding);
         switch (c->cmd_type) {
         case CMD_IOACCEL1:
-               set_ioaccel1_performant_mode(h, c);
+               set_ioaccel1_performant_mode(h, c, reply_queue);
                 writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET);
                 break;
         case CMD_IOACCEL2:
-               set_ioaccel2_performant_mode(h, c);
+               set_ioaccel2_performant_mode(h, c, reply_queue);
                 writel(c->busaddr, h->vaddr + IOACCEL2_INBOUND_POSTQ_32);
                 break;
         default:
-               set_performant_mode(h, c);
+               set_performant_mode(h, c, reply_queue);
                 h->access.submit_command(h, c);
         }
  }
  
+static void enqueue_cmd_and_start_io(struct ctlr_info *h,
+                                       struct CommandList *c)
+{
+       __enqueue_cmd_and_start_io(h, c, DEFAULT_REPLY_QUEUE);
+}
+
  static inline int is_hba_lunid(unsigned char scsi3addr[])
  {
         return memcmp(scsi3addr, RAID_CTLR_LUNID, 8) == 0;
@@ -886,6 +927,23 @@ static int hpsa_find_target_lun(struct ctlr_info *h,
         return !found;
  }
  
+static inline void hpsa_show_dev_msg(const char *level, struct ctlr_info *h,
+       struct hpsa_scsi_dev_t *dev, char *description)
+{
+       dev_printk(level, &h->pdev->dev,
+                       "scsi %d:%d:%d:%d: %s %s %.8s %.16s RAID-%s SSDSmartPathCap%c En%c Exp=%d\n",
+                       h->scsi_host->host_no, dev->bus, dev->target, dev->lun,
+                       description,
+                       scsi_device_type(dev->devtype),
+                       dev->vendor,
+                       dev->model,
+                       dev->raid_level > RAID_UNKNOWN ?
+                               "RAID-?" : raid_label[dev->raid_level],
+                       dev->offload_config ? '+' : '-',
+                       dev->offload_enabled ? '+' : '-',
+                       dev->expose_state);
+}
+
  /* Add an entry into h->dev[] array. */
  static int hpsa_scsi_add_entry(struct ctlr_info *h, int hostno,
                 struct hpsa_scsi_dev_t *device,
@@ -955,15 +1013,8 @@ lun_assigned:
         device->offload_enabled = 0;
         added[*nadded] = device;
         (*nadded)++;
-
-       /* initially, (before registering with scsi layer) we don't
-        * know our hostno and we don't want to print anything first
-        * time anyway (the scsi layer's inquiries will show that info)
-        */
-       /* if (hostno != -1) */
-               dev_info(&h->pdev->dev, "%s device c%db%dt%dl%d added.\n",
-                       scsi_device_type(device->devtype), hostno,
-                       device->bus, device->target, device->lun);
+       hpsa_show_dev_msg(KERN_INFO, h, device,
+               device->expose_state & HPSA_SCSI_ADD ? "added" : "masked");
         return 0;
  }
  
@@ -1003,6 +1054,7 @@ static void hpsa_scsi_update_entry(struct ctlr_info *h, int hostno,
         if (!new_entry->offload_enabled)
                 h->dev[entry]->offload_enabled = 0;
  
+       hpsa_show_dev_msg(KERN_INFO, h, h->dev[entry], "updated");
  }
  
  /* Replace an entry from h->dev[] array. */
@@ -1030,9 +1082,7 @@ static void hpsa_scsi_replace_entry(struct ctlr_info *h, int hostno,
         h->dev[entry] = new_entry;
         added[*nadded] = new_entry;
         (*nadded)++;
-       dev_info(&h->pdev->dev, "%s device c%db%dt%dl%d changed.\n",
-               scsi_device_type(new_entry->devtype), hostno, new_entry->bus,
-                       new_entry->target, new_entry->lun);
+       hpsa_show_dev_msg(KERN_INFO, h, new_entry, "replaced");
  }
  
  /* Remove an entry from h->dev[] array. */
@@ -1052,9 +1102,7 @@ static void hpsa_scsi_remove_entry(struct ctlr_info *h, int hostno, int entry,
         for (i = entry; i < h->ndevices-1; i++)
                 h->dev[i] = h->dev[i+1];
         h->ndevices--;
-       dev_info(&h->pdev->dev, "%s device c%db%dt%dl%d removed.\n",
-               scsi_device_type(sd->devtype), hostno, sd->bus, sd->target,
-               sd->lun);
+       hpsa_show_dev_msg(KERN_INFO, h, sd, "removed");
  }
  
  #define SCSI3ADDR_EQ(a, b) ( \
@@ -1435,9 +1483,7 @@ static void adjust_hpsa_scsi_table(struct ctlr_info *h, int hostno,
                  */
                 if (sd[i]->volume_offline) {
                         hpsa_show_volume_status(h, sd[i]);
-                       dev_info(&h->pdev->dev, "c%db%dt%dl%d: temporarily offline\n",
-                               h->scsi_host->host_no,
-                               sd[i]->bus, sd[i]->target, sd[i]->lun);
+                       hpsa_show_dev_msg(KERN_INFO, h, sd[i], "offline");
                         continue;
                 }
  
@@ -1501,10 +1547,8 @@ static void adjust_hpsa_scsi_table(struct ctlr_info *h, int hostno,
                                  * future cmds to this device will get selection
                                  * timeout as if the device was gone.
                                  */
-                               dev_warn(&h->pdev->dev,
-                                       "didn't find c%db%dt%dl%d for removal.\n",
-                                       hostno, removed[i]->bus,
-                                       removed[i]->target, removed[i]->lun);
+                               hpsa_show_dev_msg(KERN_WARNING, h, removed[i],
+                                       "didn't find device for removal.");
                         }
                 }
                 kfree(removed[i]);
@@ -1518,9 +1562,8 @@ static void adjust_hpsa_scsi_table(struct ctlr_info *h, int hostno,
                 if (scsi_add_device(sh, added[i]->bus,
                         added[i]->target, added[i]->lun) == 0)
                         continue;
-               dev_warn(&h->pdev->dev, "scsi_add_device c%db%dt%dl%d failed, "
-                       "device not added.\n", hostno, added[i]->bus,
-                       added[i]->target, added[i]->lun);
+               hpsa_show_dev_msg(KERN_WARNING, h, added[i],
+                                       "addition failed, device not added.");
                 /* now we have to remove it from h->dev,
                  * since it didn't get added to scsi mid layer
                  */
@@ -1840,6 +1883,19 @@ static void complete_scsi_command(struct CommandList *cp)
         if (cp->cmd_type == CMD_IOACCEL2 || cp->cmd_type == CMD_IOACCEL1)
                 atomic_dec(&cp->phys_disk->ioaccel_cmds_out);
  
+       /*
+        * We check for lockup status here as it may be set for
+        * CMD_SCSI, CMD_IOACCEL1 and CMD_IOACCEL2 commands by
+        * fail_all_oustanding_cmds()
+        */
+       if (unlikely(ei->CommandStatus == CMD_CTLR_LOCKUP)) {
+               /* DID_NO_CONNECT will prevent a retry */
+               cmd->result = DID_NO_CONNECT << 16;
+               cmd_free(h, cp);
+               cmd->scsi_done(cmd);
+               return;
+       }
+
         if (cp->cmd_type == CMD_IOACCEL2)
                 return process_ioaccel2_completion(h, cp, cmd, dev);
  
@@ -2054,14 +2110,36 @@ static int hpsa_map_one(struct pci_dev *pdev,
         return 0;
  }
  
-static inline void hpsa_scsi_do_simple_cmd_core(struct ctlr_info *h,
-       struct CommandList *c)
+#define NO_TIMEOUT ((unsigned long) -1)
+#define DEFAULT_TIMEOUT 30000 /* milliseconds */
+static int hpsa_scsi_do_simple_cmd_core(struct ctlr_info *h,
+       struct CommandList *c, int reply_queue, unsigned long timeout_msecs)
  {
         DECLARE_COMPLETION_ONSTACK(wait);
  
         c->waiting = &wait;
-       enqueue_cmd_and_start_io(h, c);
-       wait_for_completion(&wait);
+       __enqueue_cmd_and_start_io(h, c, reply_queue);
+       if (timeout_msecs == NO_TIMEOUT) {
+               /* TODO: get rid of this no-timeout thing */
+               wait_for_completion_io(&wait);
+               return IO_OK;
+       }
+       if (!wait_for_completion_io_timeout(&wait,
+                                       msecs_to_jiffies(timeout_msecs))) {
+               dev_warn(&h->pdev->dev, "Command timed out.\n");
+               return -ETIMEDOUT;
+       }
+       return IO_OK;
+}
+
+static int hpsa_scsi_do_simple_cmd(struct ctlr_info *h, struct CommandList *c,
+                                  int reply_queue, unsigned long timeout_msecs)
+{
+       if (unlikely(lockup_detected(h))) {
+               c->err_info->CommandStatus = CMD_CTLR_LOCKUP;
+               return IO_OK;
+       }
+       return hpsa_scsi_do_simple_cmd_core(h, c, reply_queue, timeout_msecs);
  }
  
  static u32 lockup_detected(struct ctlr_info *h)
@@ -2076,25 +2154,19 @@ static u32 lockup_detected(struct ctlr_info *h)
         return rc;
  }
  
-static void hpsa_scsi_do_simple_cmd_core_if_no_lockup(struct ctlr_info *h,
-       struct CommandList *c)
-{
-       /* If controller lockup detected, fake a hardware error. */
-       if (unlikely(lockup_detected(h)))
-               c->err_info->CommandStatus = CMD_HARDWARE_ERR;
-       else
-               hpsa_scsi_do_simple_cmd_core(h, c);
-}
-
  #define MAX_DRIVER_CMD_RETRIES 25
-static void hpsa_scsi_do_simple_cmd_with_retry(struct ctlr_info *h,
-       struct CommandList *c, int data_direction)
+static int hpsa_scsi_do_simple_cmd_with_retry(struct ctlr_info *h,
+       struct CommandList *c, int data_direction, unsigned long timeout_msecs)
  {
         int backoff_time = 10, retry_count = 0;
+       int rc;
  
         do {
                 memset(c->err_info, 0, sizeof(*c->err_info));
-               hpsa_scsi_do_simple_cmd_core(h, c);
+               rc = hpsa_scsi_do_simple_cmd(h, c, DEFAULT_REPLY_QUEUE,
+                                                 timeout_msecs);
+               if (rc)
+                       break;
                 retry_count++;
                 if (retry_count > 3) {
                         msleep(backoff_time);
@@ -2105,6 +2177,9 @@ static void hpsa_scsi_do_simple_cmd_with_retry(struct ctlr_info *h,
                         check_for_busy(h, c)) &&
                         retry_count <= MAX_DRIVER_CMD_RETRIES);
         hpsa_pci_unmap(h->pdev, c, 1, data_direction);
+       if (retry_count > MAX_DRIVER_CMD_RETRIES)
+               rc = -EIO;
+       return rc;
  }
  
  static void hpsa_print_cmd(struct ctlr_info *h, char *txt,
@@ -2181,6 +2256,9 @@ static void hpsa_scsi_interpret_error(struct ctlr_info *h,
         case CMD_UNABORTABLE:
                 hpsa_print_cmd(h, "unabortable", cp);
                 break;
+       case CMD_CTLR_LOCKUP:
+               hpsa_print_cmd(h, "controller lockup detected", cp);
+               break;
         default:
                 hpsa_print_cmd(h, "unknown status", cp);
                 dev_warn(d, "Unknown command status %x\n",
@@ -2208,7 +2286,10 @@ static int hpsa_scsi_do_inquiry(struct ctlr_info *h, unsigned char *scsi3addr,
                 rc = -1;
                 goto out;
         }
-       hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_FROMDEVICE);
+       rc = hpsa_scsi_do_simple_cmd_with_retry(h, c,
+                                       PCI_DMA_FROMDEVICE, NO_TIMEOUT);
+       if (rc)
+               goto out;
         ei = c->err_info;
         if (ei->CommandStatus != 0 && ei->CommandStatus != CMD_DATA_UNDERRUN) {
                 hpsa_scsi_interpret_error(h, c);
@@ -2238,7 +2319,10 @@ static int hpsa_bmic_ctrl_mode_sense(struct ctlr_info *h,
                 rc = -1;
                 goto out;
         }
-       hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_FROMDEVICE);
+       rc = hpsa_scsi_do_simple_cmd_with_retry(h, c,
+                       PCI_DMA_FROMDEVICE, NO_TIMEOUT);
+       if (rc)
+               goto out;
         ei = c->err_info;
         if (ei->CommandStatus != 0 && ei->CommandStatus != CMD_DATA_UNDERRUN) {
                 hpsa_scsi_interpret_error(h, c);
@@ -2250,7 +2334,7 @@ out:
         }
  
  static int hpsa_send_reset(struct ctlr_info *h, unsigned char *scsi3addr,
-       u8 reset_type)
+       u8 reset_type, int reply_queue)
  {
         int rc = IO_OK;
         struct CommandList *c;
@@ -2267,7 +2351,11 @@ static int hpsa_send_reset(struct ctlr_info *h, unsigned char *scsi3addr,
         (void) fill_cmd(c, HPSA_DEVICE_RESET_MSG, h, NULL, 0, 0,
                         scsi3addr, TYPE_MSG);
         c->Request.CDB[1] = reset_type; /* fill_cmd defaults to LUN reset */
-       hpsa_scsi_do_simple_cmd_core(h, c);
+       rc = hpsa_scsi_do_simple_cmd(h, c, reply_queue, NO_TIMEOUT);
+       if (rc) {
+               dev_warn(&h->pdev->dev, "Failed to send reset command\n");
+               goto out;
+       }
         /* no unmap needed here because no data xfer. */
  
         ei = c->err_info;
@@ -2275,6 +2363,7 @@ static int hpsa_send_reset(struct ctlr_info *h, unsigned char *scsi3addr,
                 hpsa_scsi_interpret_error(h, c);
                 rc = -1;
         }
+out:
         cmd_free(h, c);
         return rc;
  }
@@ -2392,15 +2481,18 @@ static int hpsa_get_raid_map(struct ctlr_info *h,
                         sizeof(this_device->raid_map), 0,
                         scsi3addr, TYPE_CMD)) {
                 dev_warn(&h->pdev->dev, "Out of memory in hpsa_get_raid_map()\n");
-               cmd_free(h, c);
-               return -ENOMEM;
+               rc = -ENOMEM;
+               goto out;
         }
-       hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_FROMDEVICE);
+       rc = hpsa_scsi_do_simple_cmd_with_retry(h, c,
+                                       PCI_DMA_FROMDEVICE, NO_TIMEOUT);
+       if (rc)
+               goto out;
         ei = c->err_info;
         if (ei->CommandStatus != 0 && ei->CommandStatus != CMD_DATA_UNDERRUN) {
                 hpsa_scsi_interpret_error(h, c);
-               cmd_free(h, c);
-               return -1;
+               rc = -1;
+               goto out;
         }
         cmd_free(h, c);
  
@@ -2412,6 +2504,9 @@ static int hpsa_get_raid_map(struct ctlr_info *h,
         }
         hpsa_debug_map_buff(h, rc, &this_device->raid_map);
         return rc;
+out:
+       cmd_free(h, c);
+       return rc;
  }
  
  static int hpsa_bmic_id_physical_device(struct ctlr_info *h,
@@ -2431,7 +2526,8 @@ static int hpsa_bmic_id_physical_device(struct ctlr_info *h,
         c->Request.CDB[2] = bmic_device_index & 0xff;
         c->Request.CDB[9] = (bmic_device_index >> 8) & 0xff;
  
-       hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_FROMDEVICE);
+       hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_FROMDEVICE,
+                                               NO_TIMEOUT);
         ei = c->err_info;
         if (ei->CommandStatus != 0 && ei->CommandStatus != CMD_DATA_UNDERRUN) {
                 hpsa_scsi_interpret_error(h, c);
@@ -2566,7 +2662,10 @@ static int hpsa_scsi_do_report_luns(struct ctlr_info *h, int logical,
         }
         if (extended_response)
                 c->Request.CDB[1] = extended_response;
-       hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_FROMDEVICE);
+       rc = hpsa_scsi_do_simple_cmd_with_retry(h, c,
+                                       PCI_DMA_FROMDEVICE, NO_TIMEOUT);
+       if (rc)
+               goto out;
         ei = c->err_info;
         if (ei->CommandStatus != 0 &&
             ei->CommandStatus != CMD_DATA_UNDERRUN) {
@@ -2659,7 +2758,7 @@ static int hpsa_volume_offline(struct ctlr_info *h,
  {
         struct CommandList *c;
         unsigned char *sense, sense_key, asc, ascq;
-       int ldstat = 0;
+       int rc, ldstat = 0;
         u16 cmd_status;
         u8 scsi_status;
  #define ASC_LUN_NOT_READY 0x04
@@ -2670,7 +2769,11 @@ static int hpsa_volume_offline(struct ctlr_info *h,
         if (!c)
                 return 0;
         (void) fill_cmd(c, TEST_UNIT_READY, h, NULL, 0, 0, scsi3addr, TYPE_CMD);
-       hpsa_scsi_do_simple_cmd_core(h, c);
+       rc = hpsa_scsi_do_simple_cmd(h, c, DEFAULT_REPLY_QUEUE, NO_TIMEOUT);
+       if (rc) {
+               cmd_free(h, c);
+               return 0;
+       }
         sense = c->err_info->SenseInfo;
         sense_key = sense[2];
         asc = sense[12];
@@ -2714,6 +2817,50 @@ static int hpsa_volume_offline(struct ctlr_info *h,
         return 0;
  }
  
+/*
+ * Find out if a logical device supports aborts by simply trying one.
+ * Smart Array may claim not to support aborts on logical drives, but
+ * if a MSA2000 * is connected, the drives on that will be presented
+ * by the Smart Array as logical drives, and aborts may be sent to
+ * those devices successfully.  So the simplest way to find out is
+ * to simply try an abort and see how the device responds.
+ */
+static int hpsa_device_supports_aborts(struct ctlr_info *h,
+                                       unsigned char *scsi3addr)
+{
+       struct CommandList *c;
+       struct ErrorInfo *ei;
+       int rc = 0;
+
+       u64 tag = (u64) -1; /* bogus tag */
+
+       /* Assume that physical devices support aborts */
+       if (!is_logical_dev_addr_mode(scsi3addr))
+               return 1;
+
+       c = cmd_alloc(h);
+       if (!c)
+               return -ENOMEM;
+       (void) fill_cmd(c, HPSA_ABORT_MSG, h, &tag, 0, 0, scsi3addr, TYPE_MSG);
+       (void) hpsa_scsi_do_simple_cmd(h, c, DEFAULT_REPLY_QUEUE, NO_TIMEOUT);
+       /* no unmap needed here because no data xfer. */
+       ei = c->err_info;
+       switch (ei->CommandStatus) {
+       case CMD_INVALID:
+               rc = 0;
+               break;
+       case CMD_UNABORTABLE:
+       case CMD_ABORT_FAILED:
+               rc = 1;
+               break;
+       default:
+               rc = 0;
+               break;
+       }
+       cmd_free(h, c);
+       return rc;
+}
+
  static int hpsa_update_device_info(struct ctlr_info *h,
         unsigned char scsi3addr[], struct hpsa_scsi_dev_t *this_device,
         unsigned char *is_OBDR_device)
@@ -2780,7 +2927,6 @@ static int hpsa_update_device_info(struct ctlr_info *h,
                                         strncmp(obdr_sig, OBDR_TAPE_SIG,
                                                 OBDR_SIG_LEN) == 0);
         }
-
         kfree(inq_buff);
         return 0;
  
@@ -2789,6 +2935,31 @@ bail_out:
         return 1;
  }
  
+static void hpsa_update_device_supports_aborts(struct ctlr_info *h,
+                       struct hpsa_scsi_dev_t *dev, u8 *scsi3addr)
+{
+       unsigned long flags;
+       int rc, entry;
+       /*
+        * See if this device supports aborts.  If we already know
+        * the device, we already know if it supports aborts, otherwise
+        * we have to find out if it supports aborts by trying one.
+        */
+       spin_lock_irqsave(&h->devlock, flags);
+       rc = hpsa_scsi_find_entry(dev, h->dev, h->ndevices, &entry);
+       if ((rc == DEVICE_SAME || rc == DEVICE_UPDATED) &&
+               entry >= 0 && entry < h->ndevices) {
+               dev->supports_aborts = h->dev[entry]->supports_aborts;
+               spin_unlock_irqrestore(&h->devlock, flags);
+       } else {
+               spin_unlock_irqrestore(&h->devlock, flags);
+               dev->supports_aborts =
+                               hpsa_device_supports_aborts(h, scsi3addr);
+               if (dev->supports_aborts < 0)
+                       dev->supports_aborts = 0;
+       }
+}
+
  static unsigned char *ext_target_model[] = {
         "MSA2012",
         "MSA2024",
@@ -2894,6 +3065,7 @@ static int add_ext_target_dev(struct ctlr_info *h,
         (*n_ext_target_devs)++;
         hpsa_set_bus_target_lun(this_device,
                                 tmpdevice->bus, tmpdevice->target, 0);
+       hpsa_update_device_supports_aborts(h, this_device, scsi3addr);
         set_bit(tmpdevice->target, lunzerobits);
         return 1;
  }
@@ -3148,6 +3320,7 @@ static void hpsa_update_scsi_devices(struct ctlr_info *h, int hostno)
                                                         &is_OBDR))
                         continue; /* skip it if we can't talk to it. */
                 figure_bus_target_lun(h, lunaddrbytes, tmpdevice);
+               hpsa_update_device_supports_aborts(h, tmpdevice, lunaddrbytes);
                 this_device = currentsd[ncurrent];
  
                 /*
@@ -4003,7 +4176,11 @@ static int hpsa_scsi_ioaccel_raid_map(struct ctlr_info *h,
                                                 dev->phys_disk[map_index]);
  }
  
-/* Submit commands down the "normal" RAID stack path */
+/*
+ * Submit commands down the "normal" RAID stack path
+ * All callers to hpsa_ciss_submit must check lockup_detected
+ * beforehand, before (opt.) and after calling cmd_alloc
+ */
  static int hpsa_ciss_submit(struct ctlr_info *h,
         struct CommandList *c, struct scsi_cmnd *cmd,
         unsigned char scsi3addr[])
@@ -4114,7 +4291,7 @@ static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd)
         memcpy(scsi3addr, dev->scsi3addr, sizeof(scsi3addr));
  
         if (unlikely(lockup_detected(h))) {
-               cmd->result = DID_ERROR << 16;
+               cmd->result = DID_NO_CONNECT << 16;
                 cmd->scsi_done(cmd);
                 return 0;
         }
@@ -4124,7 +4301,7 @@ static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd)
                 return SCSI_MLQUEUE_HOST_BUSY;
         }
         if (unlikely(lockup_detected(h))) {
-               cmd->result = DID_ERROR << 16;
+               cmd->result = DID_NO_CONNECT << 16;
                 cmd_free(h, c);
                 cmd->scsi_done(cmd);
                 return 0;
@@ -4319,7 +4496,10 @@ static int wait_for_device_to_become_ready(struct ctlr_info *h,
                 /* Send the Test Unit Ready, fill_cmd can't fail, no mapping */
                 (void) fill_cmd(c, TEST_UNIT_READY, h,
                                 NULL, 0, 0, lunaddr, TYPE_CMD);
-               hpsa_scsi_do_simple_cmd_core(h, c);
+               rc = hpsa_scsi_do_simple_cmd(h, c, DEFAULT_REPLY_QUEUE,
+                                               NO_TIMEOUT);
+               if (rc)
+                       goto do_it_again;
                 /* no unmap needed here because no data xfer. */
  
                 if (c->err_info->CommandStatus == CMD_SUCCESS)
@@ -4330,7 +4510,7 @@ static int wait_for_device_to_become_ready(struct ctlr_info *h,
                         (c->err_info->SenseInfo[2] == NO_SENSE ||
                         c->err_info->SenseInfo[2] == UNIT_ATTENTION))
                         break;
-
+do_it_again:
                 dev_warn(&h->pdev->dev, "waiting %d secs "
                         "for device to become ready.\n", waittime);
                 rc = 1; /* device not ready. */
@@ -4368,14 +4548,36 @@ static int hpsa_eh_device_reset_handler(struct scsi_cmnd *scsicmd)
                         "device lookup failed.\n");
                 return FAILED;
         }
-       dev_warn(&h->pdev->dev, "resetting device %d:%d:%d:%d\n",
-               h->scsi_host->host_no, dev->bus, dev->target, dev->lun);
+
+       /* if controller locked up, we can guarantee command won't complete */
+       if (lockup_detected(h)) {
+               dev_warn(&h->pdev->dev,
+                       "scsi %d:%d:%d:%d RESET FAILED, lockup detected\n",
+                       h->scsi_host->host_no, dev->bus, dev->target,
+                       dev->lun);
+               return FAILED;
+       }
+
+       /* this reset request might be the result of a lockup; check */
+       if (detect_controller_lockup(h)) {
+               dev_warn(&h->pdev->dev,
+                        "scsi %d:%d:%d:%d RESET FAILED, new lockup detected\n",
+                        h->scsi_host->host_no, dev->bus, dev->target,
+                        dev->lun);
+               return FAILED;
+       }
+
+       hpsa_show_dev_msg(KERN_WARNING, h, dev, "resetting");
+
         /* send a reset to the SCSI LUN which the command was sent to */
-       rc = hpsa_send_reset(h, dev->scsi3addr, HPSA_RESET_TYPE_LUN);
+       rc = hpsa_send_reset(h, dev->scsi3addr, HPSA_RESET_TYPE_LUN,
+                            DEFAULT_REPLY_QUEUE);
         if (rc == 0 && wait_for_device_to_become_ready(h, dev->scsi3addr) == 0)
                 return SUCCESS;
  
-       dev_warn(&h->pdev->dev, "resetting device failed.\n");
+       dev_warn(&h->pdev->dev,
+               "scsi %d:%d:%d:%d reset failed\n",
+               h->scsi_host->host_no, dev->bus, dev->target, dev->lun);
         return FAILED;
  }
  
@@ -4420,7 +4622,7 @@ static void hpsa_get_tag(struct ctlr_info *h,
  }
  
  static int hpsa_send_abort(struct ctlr_info *h, unsigned char *scsi3addr,
-       struct CommandList *abort, int swizzle)
+       struct CommandList *abort, int reply_queue)
  {
         int rc = IO_OK;
         struct CommandList *c;
@@ -4434,13 +4636,13 @@ static int hpsa_send_abort(struct ctlr_info *h, unsigned char *scsi3addr,
         }
  
         /* fill_cmd can't fail here, no buffer to map */
-       (void) fill_cmd(c, HPSA_ABORT_MSG, h, abort,
+       (void) fill_cmd(c, HPSA_ABORT_MSG, h, &abort->Header.tag,
                 0, 0, scsi3addr, TYPE_MSG);
-       if (swizzle)
+       if (h->needs_abort_tags_swizzled)
                 swizzle_abort_tag(&c->Request.CDB[4]);
-       hpsa_scsi_do_simple_cmd_core(h, c);
+       (void) hpsa_scsi_do_simple_cmd(h, c, reply_queue, NO_TIMEOUT);
         hpsa_get_tag(h, abort, &taglower, &tagupper);
-       dev_dbg(&h->pdev->dev, "%s: Tag:0x%08x:%08x: do_simple_cmd_core completed.\n",
+       dev_dbg(&h->pdev->dev, "%s: Tag:0x%08x:%08x: do_simple_cmd(abort) completed.\n",
                 __func__, tagupper, taglower);
         /* no unmap needed here because no data xfer. */
  
@@ -4472,7 +4674,7 @@ static int hpsa_send_abort(struct ctlr_info *h, unsigned char *scsi3addr,
   */
  
  static int hpsa_send_reset_as_abort_ioaccel2(struct ctlr_info *h,
-       unsigned char *scsi3addr, struct CommandList *abort)
+       unsigned char *scsi3addr, struct CommandList *abort, int reply_queue)
  {
         int rc = IO_OK;
         struct scsi_cmnd *scmd; /* scsi command within request being aborted */
@@ -4491,8 +4693,9 @@ static int hpsa_send_reset_as_abort_ioaccel2(struct ctlr_info *h,
  
         if (h->raid_offload_debug > 0)
                 dev_info(&h->pdev->dev,
-                       "Reset as abort: Abort requested on C%d:B%d:T%d:L%d scsi3addr 0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
+                       "scsi %d:%d:%d:%d %s scsi3addr 0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
                         h->scsi_host->host_no, dev->bus, dev->target, dev->lun,
+                       "Reset as abort",
                         scsi3addr[0], scsi3addr[1], scsi3addr[2], scsi3addr[3],
                         scsi3addr[4], scsi3addr[5], scsi3addr[6], scsi3addr[7]);
  
@@ -4514,7 +4717,7 @@ static int hpsa_send_reset_as_abort_ioaccel2(struct ctlr_info *h,
                         "Reset as abort: Resetting physical device at scsi3addr 0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
                         psa[0], psa[1], psa[2], psa[3],
                         psa[4], psa[5], psa[6], psa[7]);
-       rc = hpsa_send_reset(h, psa, HPSA_RESET_TYPE_TARGET);
+       rc = hpsa_send_reset(h, psa, HPSA_RESET_TYPE_TARGET, reply_queue);
         if (rc != 0) {
                 dev_warn(&h->pdev->dev,
                         "Reset as abort: Failed on physical device at scsi3addr 0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
@@ -4541,14 +4744,8 @@ static int hpsa_send_reset_as_abort_ioaccel2(struct ctlr_info *h,
         return rc; /* success */
  }
  
-/* Some Smart Arrays need the abort tag swizzled, and some don't.  It's hard to
- * tell which kind we're dealing with, so we send the abort both ways.  There
- * shouldn't be any collisions between swizzled and unswizzled tags due to the
- * way we construct our tags but we check anyway in case the assumptions which
- * make this true someday become false.
- */
  static int hpsa_send_abort_both_ways(struct ctlr_info *h,
-       unsigned char *scsi3addr, struct CommandList *abort)
+       unsigned char *scsi3addr, struct CommandList *abort, int reply_queue)
  {
         /* ioccelerator mode 2 commands should be aborted via the
          * accelerated path, since RAID path is unaware of these commands,
@@ -4556,10 +4753,30 @@ static int hpsa_send_abort_both_ways(struct ctlr_info *h,
          * Change abort to physical device reset.
          */
         if (abort->cmd_type == CMD_IOACCEL2)
-               return hpsa_send_reset_as_abort_ioaccel2(h, scsi3addr, abort);
+               return hpsa_send_reset_as_abort_ioaccel2(h, scsi3addr,
+                                                       abort, reply_queue);
+       return hpsa_send_abort(h, scsi3addr, abort, reply_queue);
+}
  
-       return hpsa_send_abort(h, scsi3addr, abort, 0) &&
-                       hpsa_send_abort(h, scsi3addr, abort, 1);
+/* Find out which reply queue a command was meant to return on */
+static int hpsa_extract_reply_queue(struct ctlr_info *h,
+                                       struct CommandList *c)
+{
+       if (c->cmd_type == CMD_IOACCEL2)
+               return h->ioaccel2_cmd_pool[c->cmdindex].reply_queue;
+       return c->Header.ReplyQueue;
+}
+
+/*
+ * Limit concurrency of abort commands to prevent
+ * over-subscription of commands
+ */
+static inline int wait_for_available_abort_cmd(struct ctlr_info *h)
+{
+#define ABORT_CMD_WAIT_MSECS 5000
+       return !wait_event_timeout(h->abort_cmd_wait_queue,
+                       atomic_dec_if_positive(&h->abort_cmds_available) >= 0,
+                       msecs_to_jiffies(ABORT_CMD_WAIT_MSECS));
  }
  
  /* Send an abort for the specified command.
@@ -4577,27 +4794,19 @@ static int hpsa_eh_abort_handler(struct scsi_cmnd *sc)
         char msg[256];          /* For debug messaging. */
         int ml = 0;
         __le32 tagupper, taglower;
-       int refcount;
+       int refcount, reply_queue;
  
-       /* Find the controller of the command to be aborted */
-       h = sdev_to_hba(sc->device);
-       if (WARN(h == NULL,
-                       "ABORT REQUEST FAILED, Controller lookup failed.\n"))
+       if (sc == NULL)
                 return FAILED;
  
-       if (lockup_detected(h))
+       if (sc->device == NULL)
                 return FAILED;
  
-       /* Check that controller supports some kind of task abort */
-       if (!(HPSATMF_PHYS_TASK_ABORT & h->TMFSupportFlags) &&
-               !(HPSATMF_LOG_TASK_ABORT & h->TMFSupportFlags))
+       /* Find the controller of the command to be aborted */
+       h = sdev_to_hba(sc->device);
+       if (h == NULL)
                 return FAILED;
  
-       memset(msg, 0, sizeof(msg));
-       ml += sprintf(msg+ml, "ABORT REQUEST on C%d:B%d:T%d:L%llu ",
-               h->scsi_host->host_no, sc->device->channel,
-               sc->device->id, sc->device->lun);
-
         /* Find the device of the command to be aborted */
         dev = sc->device->hostdata;
         if (!dev) {
@@ -4606,6 +4815,31 @@ static int hpsa_eh_abort_handler(struct scsi_cmnd *sc)
                 return FAILED;
         }
  
+       /* If controller locked up, we can guarantee command won't complete */
+       if (lockup_detected(h)) {
+               hpsa_show_dev_msg(KERN_WARNING, h, dev,
+                                       "ABORT FAILED, lockup detected");
+               return FAILED;
+       }
+
+       /* This is a good time to check if controller lockup has occurred */
+       if (detect_controller_lockup(h)) {
+               hpsa_show_dev_msg(KERN_WARNING, h, dev,
+                                       "ABORT FAILED, new lockup detected");
+               return FAILED;
+       }
+
+       /* Check that controller supports some kind of task abort */
+       if (!(HPSATMF_PHYS_TASK_ABORT & h->TMFSupportFlags) &&
+               !(HPSATMF_LOG_TASK_ABORT & h->TMFSupportFlags))
+               return FAILED;
+
+       memset(msg, 0, sizeof(msg));
+       ml += sprintf(msg+ml, "scsi %d:%d:%d:%llu %s",
+               h->scsi_host->host_no, sc->device->channel,
+               sc->device->id, sc->device->lun,
+               "Aborting command");
+
         /* Get SCSI command to be aborted */
         abort = (struct CommandList *) sc->host_scribble;
         if (abort == NULL) {
@@ -4617,26 +4851,40 @@ static int hpsa_eh_abort_handler(struct scsi_cmnd *sc)
                 cmd_free(h, abort);
                 return SUCCESS;
         }
+
+       /* Don't bother trying the abort if we know it won't work. */
+       if (abort->cmd_type != CMD_IOACCEL2 &&
+               abort->cmd_type != CMD_IOACCEL1 && !dev->supports_aborts) {
+               cmd_free(h, abort);
+               return FAILED;
+       }
+
         hpsa_get_tag(h, abort, &taglower, &tagupper);
+       reply_queue = hpsa_extract_reply_queue(h, abort);
         ml += sprintf(msg+ml, "Tag:0x%08x:%08x ", tagupper, taglower);
         as  = abort->scsi_cmd;
         if (as != NULL)
                 ml += sprintf(msg+ml, "Command:0x%x SN:0x%lx ",
                         as->cmnd[0], as->serial_number);
         dev_dbg(&h->pdev->dev, "%s\n", msg);
-       dev_warn(&h->pdev->dev, "Abort request on C%d:B%d:T%d:L%d\n",
-               h->scsi_host->host_no, dev->bus, dev->target, dev->lun);
+       hpsa_show_dev_msg(KERN_WARNING, h, dev, "Aborting command");
         /*
          * Command is in flight, or possibly already completed
          * by the firmware (but not to the scsi mid layer) but we can't
          * distinguish which.  Send the abort down.
          */
-       rc = hpsa_send_abort_both_ways(h, dev->scsi3addr, abort);
+       if (wait_for_available_abort_cmd(h)) {
+               dev_warn(&h->pdev->dev,
+                       "Timed out waiting for an abort command to become available.\n");
+               cmd_free(h, abort);
+               return FAILED;
+       }
+       rc = hpsa_send_abort_both_ways(h, dev->scsi3addr, abort, reply_queue);
+       atomic_inc(&h->abort_cmds_available);
+       wake_up_all(&h->abort_cmd_wait_queue);
         if (rc != 0) {
-               dev_dbg(&h->pdev->dev, "%s Request FAILED.\n", msg);
-               dev_warn(&h->pdev->dev, "FAILED abort on device C%d:B%d:T%d:L%d\n",
-                       h->scsi_host->host_no,
-                       dev->bus, dev->target, dev->lun);
+               hpsa_show_dev_msg(KERN_WARNING, h, dev,
+                                       "FAILED to abort command");
                 cmd_free(h, abort);
                 return FAILED;
         }
@@ -4956,10 +5204,14 @@ static int hpsa_passthru_ioctl(struct ctlr_info *h, void __user *argp)
                 c->SG[0].Len = cpu_to_le32(iocommand.buf_size);
                 c->SG[0].Ext = cpu_to_le32(HPSA_SG_LAST); /* not chaining */
         }
-       hpsa_scsi_do_simple_cmd_core_if_no_lockup(h, c);
+       rc = hpsa_scsi_do_simple_cmd(h, c, DEFAULT_REPLY_QUEUE, NO_TIMEOUT);
         if (iocommand.buf_size > 0)
                 hpsa_pci_unmap(h->pdev, c, 1, PCI_DMA_BIDIRECTIONAL);
         check_ioctl_unit_attention(h, c);
+       if (rc) {
+               rc = -EIO;
+               goto out;
+       }
  
         /* Copy the error information out */
         memcpy(&iocommand.error_info, c->err_info,
@@ -5086,10 +5338,15 @@ static int hpsa_big_passthru_ioctl(struct ctlr_info *h, void __user *argp)
                 }
                 c->SG[--i].Ext = cpu_to_le32(HPSA_SG_LAST);
         }
-       hpsa_scsi_do_simple_cmd_core_if_no_lockup(h, c);
+       status = hpsa_scsi_do_simple_cmd(h, c, DEFAULT_REPLY_QUEUE, NO_TIMEOUT);
         if (sg_used)
                 hpsa_pci_unmap(h->pdev, c, sg_used, PCI_DMA_BIDIRECTIONAL);
         check_ioctl_unit_attention(h, c);
+       if (status) {
+               status = -EIO;
+               goto cleanup0;
+       }
+
         /* Copy the error information out */
         memcpy(&ioc->error_info, c->err_info, sizeof(ioc->error_info));
         if (copy_to_user(argp, ioc, sizeof(*ioc))) {
@@ -5197,7 +5454,7 @@ static int fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
         int cmd_type)
  {
         int pci_dir = XFER_NONE;
-       struct CommandList *a; /* for commands to be aborted */
+       u64 tag; /* for commands to be aborted */
  
         c->cmd_type = CMD_IOCTL_PEND;
         c->Header.ReplyQueue = 0;
@@ -5313,10 +5570,10 @@ static int fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
                         c->Request.CDB[7] = 0x00;
                         break;
                 case  HPSA_ABORT_MSG:
-                       a = buff;       /* point to command to be aborted */
+                       memcpy(&tag, buff, sizeof(tag));
                         dev_dbg(&h->pdev->dev,
-                               "Abort Tag:0x%016llx request Tag:0x%016llx",
-                               a->Header.tag, c->Header.tag);
+                               "Abort Tag:0x%016llx using rqst Tag:0x%016llx",
+                               tag, c->Header.tag);
                         c->Request.CDBLen = 16;
                         c->Request.type_attr_dir =
                                         TYPE_ATTR_DIR(cmd_type,
@@ -5327,8 +5584,7 @@ static int fill_cmd(struct CommandList *c, u8 cmd, struct ctlr_info *h,
                         c->Request.CDB[2] = 0x00; /* reserved */
                         c->Request.CDB[3] = 0x00; /* reserved */
                         /* Tag to abort goes in CDB[4]-CDB[11] */
-                       memcpy(&c->Request.CDB[4], &a->Header.tag,
-                               sizeof(a->Header.tag));
+                       memcpy(&c->Request.CDB[4], &tag, sizeof(tag));
                         c->Request.CDB[12] = 0x00; /* reserved */
                         c->Request.CDB[13] = 0x00; /* reserved */
                         c->Request.CDB[14] = 0x00; /* reserved */
@@ -6227,6 +6483,8 @@ static int hpsa_wait_for_mode_change_ack(struct ctlr_info *h)
          * as we enter this code.)
          */
         for (i = 0; i < MAX_MODE_CHANGE_WAIT; i++) {
+               if (h->remove_in_progress)
+                       goto done;
                 spin_lock_irqsave(&h->lock, flags);
                 doorbell_value = readl(h->vaddr + SA5_DOORBELL);
                 spin_unlock_irqrestore(&h->lock, flags);
@@ -6277,6 +6535,9 @@ static int hpsa_pci_init(struct ctlr_info *h)
         h->product_name = products[prod_index].product_name;
         h->access = *(products[prod_index].access);
  
+       h->needs_abort_tags_swizzled =
+               ctlr_needs_abort_tags_swizzled(h->board_id);
+
         pci_disable_link_state(h->pdev, PCIE_LINK_STATE_L0S |
                                PCIE_LINK_STATE_L1 | PCIE_LINK_STATE_CLKPM);
  
@@ -6622,17 +6883,21 @@ static void fail_all_outstanding_cmds(struct ctlr_info *h)
  {
         int i, refcount;
         struct CommandList *c;
+       int failcount = 0;
  
         flush_workqueue(h->resubmit_wq); /* ensure all cmds are fully built */
         for (i = 0; i < h->nr_cmds; i++) {
                 c = h->cmd_pool + i;
                 refcount = atomic_inc_return(&c->refcount);
                 if (refcount > 1) {
-                       c->err_info->CommandStatus = CMD_HARDWARE_ERR;
+                       c->err_info->CommandStatus = CMD_CTLR_LOCKUP;
                         finish_cmd(c);
+                       failcount++;
                 }
                 cmd_free(h, c);
         }
+       dev_warn(&h->pdev->dev,
+               "failed %d commands in fail_all\n", failcount);
  }
  
  static void set_lockup_detected_for_all_cpus(struct ctlr_info *h, u32 value)
@@ -6658,18 +6923,19 @@ static void controller_lockup_detected(struct ctlr_info *h)
         if (!lockup_detected) {
                 /* no heartbeat, but controller gave us a zero. */
                 dev_warn(&h->pdev->dev,
-                       "lockup detected but scratchpad register is zero\n");
+                       "lockup detected after %d but scratchpad register is zero\n",
+                       h->heartbeat_sample_interval / HZ);
                 lockup_detected = 0xffffffff;
         }
         set_lockup_detected_for_all_cpus(h, lockup_detected);
         spin_unlock_irqrestore(&h->lock, flags);
-       dev_warn(&h->pdev->dev, "Controller lockup detected: 0x%08x\n",
-                       lockup_detected);
+       dev_warn(&h->pdev->dev, "Controller lockup detected: 0x%08x after %d\n",
+                       lockup_detected, h->heartbeat_sample_interval / HZ);
         pci_disable_device(h->pdev);
         fail_all_outstanding_cmds(h);
  }
  
-static void detect_controller_lockup(struct ctlr_info *h)
+static int detect_controller_lockup(struct ctlr_info *h)
  {
         u64 now;
         u32 heartbeat;
@@ -6679,7 +6945,7 @@ static void detect_controller_lockup(struct ctlr_info *h)
         /* If we've received an interrupt recently, we're ok. */
         if (time_after64(h->last_intr_timestamp +
                                 (h->heartbeat_sample_interval), now))
-               return;
+               return false;
  
         /*
          * If we've already checked the heartbeat recently, we're ok.
@@ -6688,7 +6954,7 @@ static void detect_controller_lockup(struct ctlr_info *h)
          */
         if (time_after64(h->last_heartbeat_timestamp +
                                 (h->heartbeat_sample_interval), now))
-               return;
+               return false;
  
         /* If heartbeat has not changed since we last looked, we're not ok. */
         spin_lock_irqsave(&h->lock, flags);
@@ -6696,12 +6962,13 @@ static void detect_controller_lockup(struct ctlr_info *h)
         spin_unlock_irqrestore(&h->lock, flags);
         if (h->last_heartbeat == heartbeat) {
                 controller_lockup_detected(h);
-               return;
+               return true;
         }
  
         /* We're ok. */
         h->last_heartbeat = heartbeat;
         h->last_heartbeat_timestamp = now;
+       return false;
  }
  
  static void hpsa_ack_ctlr_events(struct ctlr_info *h)
@@ -6890,6 +7157,7 @@ reinit_after_soft_reset:
         spin_lock_init(&h->offline_device_lock);
         spin_lock_init(&h->scan_lock);
         atomic_set(&h->passthru_cmds_avail, HPSA_MAX_CONCURRENT_PASSTHRUS);
+       atomic_set(&h->abort_cmds_available, HPSA_CMDS_RESERVED_FOR_ABORTS);
  
         h->rescan_ctlr_wq = hpsa_create_controller_wq(h, "rescan");
         if (!h->rescan_ctlr_wq) {
@@ -6947,6 +7215,7 @@ reinit_after_soft_reset:
         if (hpsa_allocate_sg_chain_blocks(h))
                 goto clean4;
         init_waitqueue_head(&h->scan_wait_queue);
+       init_waitqueue_head(&h->abort_cmd_wait_queue);
         h->scan_finished = 1; /* no scan currently in progress */
  
         pci_set_drvdata(pdev, h);
@@ -7052,8 +7321,10 @@ static void hpsa_flush_cache(struct ctlr_info *h)
  {
         char *flush_buf;
         struct CommandList *c;
+       int rc;
  
         /* Don't bother trying to flush the cache if locked up */
+       /* FIXME not necessary if do_simple_cmd does the check */
         if (unlikely(lockup_detected(h)))
                 return;
         flush_buf = kzalloc(4, GFP_KERNEL);
@@ -7069,7 +7340,10 @@ static void hpsa_flush_cache(struct ctlr_info *h)
                 RAID_CTLR_LUNID, TYPE_CMD)) {
                 goto out;
         }
-       hpsa_scsi_do_simple_cmd_with_retry(h, c, PCI_DMA_TODEVICE);
+       rc = hpsa_scsi_do_simple_cmd_with_retry(h, c,
+                                       PCI_DMA_TODEVICE, NO_TIMEOUT);
+       if (rc)
+               goto out;
         if (c->err_info->CommandStatus != 0)
  out:
                 dev_warn(&h->pdev->dev,