drm/nouveau/mmu/gp100-: support vmms with gcc/tex replayable faults enabled

Some GPU units are capable of supporting "replayable" page faults, where
the execution unit will wait for SW to fixup GPU page tables rather than
triggering a channel-fatal fault.

This feature isn't useful (it's harmful, even) unless something like HMM
is being used to manage events appearing in the replayable fault buffer,
so, it's disabled by default.

This commit allows a client to request it be enabled.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
diff --git a/drivers/gpu/drm/nouveau/include/nvif/ifc00d.h b/drivers/gpu/drm/nouveau/include/nvif/ifc00d.h
index 33ff6c7..4cabd61 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/ifc00d.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/ifc00d.h
@@ -6,6 +6,12 @@ struct gp100_vmm_vn {
 	/* nvif_vmm_vX ... */
 };
 
+struct gp100_vmm_v0 {
+	/* nvif_vmm_vX ... */
+	__u8  version;
+	__u8  fault_replay;
+};
+
 struct gp100_vmm_map_vn {
 	/* nvif_vmm_map_vX ... */
 };
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
index 8b117cb..28ade86 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
@@ -45,6 +45,8 @@ struct nvkm_vmm {
 
 	dma_addr_t null;
 	void *nullp;
+
+	bool replay;
 };
 
 int nvkm_vmm_new(struct nvkm_device *, u64 addr, u64 size, void *argv, u32 argc,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp100.c
index 651b880..65cb9d2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp100.c
@@ -31,7 +31,7 @@ gp100_mmu = {
 	.dma_bits = 47,
 	.mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}},
 	.mem = {{ -1,  0, NVIF_CLASS_MEM_GF100}, gf100_mem_new, gf100_mem_map },
-	.vmm = {{ -1, -1, NVIF_CLASS_VMM_GP100}, gp100_vmm_new },
+	.vmm = {{ -1,  0, NVIF_CLASS_VMM_GP100}, gp100_vmm_new },
 	.kind = gm200_mmu_kind,
 	.kind_sys = true,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c
index 3bd3db3..0a50be9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c
@@ -31,7 +31,7 @@ gp10b_mmu = {
 	.dma_bits = 47,
 	.mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}},
 	.mem = {{ -1, -1, NVIF_CLASS_MEM_GF100}, .umap = gf100_mem_map },
-	.vmm = {{ -1, -1, NVIF_CLASS_VMM_GP100}, gp10b_vmm_new },
+	.vmm = {{ -1,  0, NVIF_CLASS_VMM_GP100}, gp10b_vmm_new },
 	.kind = gm200_mmu_kind,
 	.kind_sys = true,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gv100.c
index f666cb5..e0997ee 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gv100.c
@@ -31,7 +31,7 @@ gv100_mmu = {
 	.dma_bits = 47,
 	.mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}},
 	.mem = {{ -1,  0, NVIF_CLASS_MEM_GF100}, gf100_mem_new, gf100_mem_map },
-	.vmm = {{ -1, -1, NVIF_CLASS_VMM_GP100}, gv100_vmm_new },
+	.vmm = {{ -1,  0, NVIF_CLASS_VMM_GP100}, gv100_vmm_new },
 	.kind = gm200_mmu_kind,
 	.kind_sys = true,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
index e85f19f..5e55ecb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
@@ -220,6 +220,9 @@ int gm200_vmm_new_(const struct nvkm_vmm_func *, const struct nvkm_vmm_func *,
 int gm200_vmm_join_(struct nvkm_vmm *, struct nvkm_memory *, u64 base);
 int gm200_vmm_join(struct nvkm_vmm *, struct nvkm_memory *);
 
+int gp100_vmm_new_(const struct nvkm_vmm_func *,
+		   struct nvkm_mmu *, bool, u64, u64, void *, u32,
+		   struct lock_class_key *, const char *, struct nvkm_vmm **);
 int gp100_vmm_join(struct nvkm_vmm *, struct nvkm_memory *);
 int gp100_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *);
 void gp100_vmm_flush(struct nvkm_vmm *, int);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c
index af427da..b4f5197 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c
@@ -476,7 +476,11 @@ gp100_vmm_flush(struct nvkm_vmm *vmm, int depth)
 int
 gp100_vmm_join(struct nvkm_vmm *vmm, struct nvkm_memory *inst)
 {
-	const u64 base = BIT_ULL(10) /* VER2 */ | BIT_ULL(11); /* 64KiB */
+	u64 base = BIT_ULL(10) /* VER2 */ | BIT_ULL(11) /* 64KiB */;
+	if (vmm->replay) {
+		base |= BIT_ULL(4); /* FAULT_REPLAY_TEX */
+		base |= BIT_ULL(5); /* FAULT_REPLAY_GCC */
+	}
 	return gf100_vmm_join_(vmm, inst, base);
 }
 
@@ -501,10 +505,39 @@ gp100_vmm = {
 };
 
 int
+gp100_vmm_new_(const struct nvkm_vmm_func *func,
+	       struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size,
+	       void *argv, u32 argc, struct lock_class_key *key,
+	       const char *name, struct nvkm_vmm **pvmm)
+{
+	union {
+		struct gp100_vmm_vn vn;
+		struct gp100_vmm_v0 v0;
+	} *args = argv;
+	int ret = -ENOSYS;
+	bool replay;
+
+	if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) {
+		replay = args->v0.fault_replay != 0;
+	} else
+	if (!(ret = nvif_unvers(ret, &argv, &argc, args->vn))) {
+		replay = false;
+	} else
+		return ret;
+
+	ret = nvkm_vmm_new_(func, mmu, 0, managed, addr, size, key, name, pvmm);
+	if (ret)
+		return ret;
+
+	(*pvmm)->replay = replay;
+	return 0;
+}
+
+int
 gp100_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size,
 	      void *argv, u32 argc, struct lock_class_key *key,
 	      const char *name, struct nvkm_vmm **pvmm)
 {
-	return nv04_vmm_new_(&gp100_vmm, mmu, 0, managed, addr, size,
-			     argv, argc, key, name, pvmm);
+	return gp100_vmm_new_(&gp100_vmm, mmu, managed, addr, size,
+			      argv, argc, key, name, pvmm);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c
index 317a83e..e081239a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c
@@ -46,6 +46,6 @@ gp10b_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size,
 	      void *argv, u32 argc, struct lock_class_key *key,
 	      const char *name, struct nvkm_vmm **pvmm)
 {
-	return nv04_vmm_new_(&gp10b_vmm, mmu, 0, managed, addr, size,
-			     argv, argc, key, name, pvmm);
+	return gp100_vmm_new_(&gp10b_vmm, mmu, managed, addr, size,
+			      argv, argc, key, name, pvmm);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c
index 172d685..f0e21f6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c
@@ -84,6 +84,6 @@ gv100_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size,
 	      void *argv, u32 argc, struct lock_class_key *key,
 	      const char *name, struct nvkm_vmm **pvmm)
 {
-	return nv04_vmm_new_(&gv100_vmm, mmu, 0, managed, addr, size,
-			     argv, argc, key, name, pvmm);
+	return gp100_vmm_new_(&gv100_vmm, mmu, managed, addr, size,
+			      argv, argc, key, name, pvmm);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c
index 3bfef14..be91cff 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c
@@ -73,6 +73,6 @@ tu102_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size,
 	      void *argv, u32 argc, struct lock_class_key *key,
 	      const char *name, struct nvkm_vmm **pvmm)
 {
-	return nv04_vmm_new_(&tu102_vmm, mmu, 0, managed, addr, size,
-			     argv, argc, key, name, pvmm);
+	return gp100_vmm_new_(&tu102_vmm, mmu, managed, addr, size,
+			      argv, argc, key, name, pvmm);
 }