cxl: Adapter failure handling
Check the AFU state whenever an API is called. The hypervisor may
issue a reset of the adapter when it detects a fault. When it happens,
it launches an error recovery which will either move the AFU to a
permanent failure state, or in the disabled state.
If the AFU is found to be disabled, detach all existing contexts from
it before issuing a AFU reset to re-enable it.
Before detaching contexts, notify any kernel driver through the EEH
callbacks of the AFU pci device.
Co-authored-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
Signed-off-by: Christophe Lombard <clombard@linux.vnet.ibm.com>
Reviewed-by: Manoj Kumar <manoj@linux.vnet.ibm.com>
Acked-by: Ian Munsie <imunsie@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index b388c97..e345860 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -379,6 +379,8 @@
phys_addr_t p2n_phys;
u64 p2n_size;
int max_ints;
+ struct mutex recovery_lock;
+ int previous_state;
};
struct cxl_afu {
@@ -617,7 +619,7 @@
__be32 software_state;
} __packed;
-static inline bool cxl_adapter_link_ok(struct cxl *cxl)
+static inline bool cxl_adapter_link_ok(struct cxl *cxl, struct cxl_afu *afu)
{
struct pci_dev *pdev;
@@ -636,13 +638,13 @@
static inline void cxl_p1_write(struct cxl *cxl, cxl_p1_reg_t reg, u64 val)
{
- if (likely(cxl_adapter_link_ok(cxl)))
+ if (likely(cxl_adapter_link_ok(cxl, NULL)))
out_be64(_cxl_p1_addr(cxl, reg), val);
}
static inline u64 cxl_p1_read(struct cxl *cxl, cxl_p1_reg_t reg)
{
- if (likely(cxl_adapter_link_ok(cxl)))
+ if (likely(cxl_adapter_link_ok(cxl, NULL)))
return in_be64(_cxl_p1_addr(cxl, reg));
else
return ~0ULL;
@@ -656,13 +658,13 @@
static inline void cxl_p1n_write(struct cxl_afu *afu, cxl_p1n_reg_t reg, u64 val)
{
- if (likely(cxl_adapter_link_ok(afu->adapter)))
+ if (likely(cxl_adapter_link_ok(afu->adapter, afu)))
out_be64(_cxl_p1n_addr(afu, reg), val);
}
static inline u64 cxl_p1n_read(struct cxl_afu *afu, cxl_p1n_reg_t reg)
{
- if (likely(cxl_adapter_link_ok(afu->adapter)))
+ if (likely(cxl_adapter_link_ok(afu->adapter, afu)))
return in_be64(_cxl_p1n_addr(afu, reg));
else
return ~0ULL;
@@ -675,13 +677,13 @@
static inline void cxl_p2n_write(struct cxl_afu *afu, cxl_p2n_reg_t reg, u64 val)
{
- if (likely(cxl_adapter_link_ok(afu->adapter)))
+ if (likely(cxl_adapter_link_ok(afu->adapter, afu)))
out_be64(_cxl_p2n_addr(afu, reg), val);
}
static inline u64 cxl_p2n_read(struct cxl_afu *afu, cxl_p2n_reg_t reg)
{
- if (likely(cxl_adapter_link_ok(afu->adapter)))
+ if (likely(cxl_adapter_link_ok(afu->adapter, afu)))
return in_be64(_cxl_p2n_addr(afu, reg));
else
return ~0ULL;
@@ -857,7 +859,7 @@
u64 wed, u64 amr);
int (*detach_process)(struct cxl_context *ctx);
bool (*support_attributes)(const char *attr_name, enum cxl_attrs type);
- bool (*link_ok)(struct cxl *cxl);
+ bool (*link_ok)(struct cxl *cxl, struct cxl_afu *afu);
void (*release_afu)(struct device *dev);
ssize_t (*afu_read_err_buffer)(struct cxl_afu *afu, char *buf,
loff_t off, size_t count);