IB/mthca: Recover from catastrophic errors
Trigger device remove and then add when a catastrophic error is
detected in hardware. This, in turn, will cause a device reset, which
we hope will recover from the catastrophic condition.
Since this might interefere with debugging the root cause, add a
module option to suppress this behaviour.
Signed-off-by: Jack Morgenstein <jackm@mellanox.co.il>
Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 33bd0b8..fe5cecf 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -45,6 +45,7 @@
#include <linux/dma-mapping.h>
#include <linux/timer.h>
#include <linux/mutex.h>
+#include <linux/list.h>
#include <asm/semaphore.h>
@@ -283,8 +284,11 @@
unsigned long stop;
u32 size;
struct timer_list timer;
+ struct list_head list;
};
+extern struct mutex mthca_device_mutex;
+
struct mthca_dev {
struct ib_device ib_dev;
struct pci_dev *pdev;
@@ -450,6 +454,9 @@
void mthca_start_catas_poll(struct mthca_dev *dev);
void mthca_stop_catas_poll(struct mthca_dev *dev);
+int __mthca_restart_one(struct pci_dev *pdev);
+int mthca_catas_init(void);
+void mthca_catas_cleanup(void);
int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar);
void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);