MIPS: kdump: Add support

[ralf@linux-mips.org: Original patch by Maxim Uvarov <muvarov@gmail.com>
with plenty of further shining, polishing, debugging and testing by me.]

Signed-off-by: Maxim Uvarov <muvarov@gmail.com>
Cc: linux-mips@linux-mips.org
Cc: kexec@lists.infradead.org
Cc: horms@verge.net.au
Patchwork: https://patchwork.linux-mips.org/patch/1025/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 462f9c4..c97fc03 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2379,6 +2379,29 @@
 	  support.  As of this writing the exact hardware interface is
 	  strongly in flux, so no good recommendation can be made.
 
+config CRASH_DUMP
+	  bool "Kernel crash dumps"
+	  help
+	  Generate crash dump after being started by kexec.
+	  This should be normally only set in special crash dump kernels
+	  which are loaded in the main kernel with kexec-tools into
+	  a specially reserved region and then later executed after
+	  a crash by kdump/kexec. The crash dump kernel must be compiled
+	  to a memory address not used by the main kernel or firmware using
+	  PHYSICAL_START.
+
+config PHYSICAL_START
+	  hex "Physical address where the kernel is loaded"
+	  default "0xffffffff84000000" if 64BIT
+	  default "0x84000000" if 32BIT
+	  depends on CRASH_DUMP
+	  help
+	  This gives the CKSEG0 or KSEG0 address where the kernel is loaded.
+	  If you plan to use kernel for capturing the crash dump change
+	  this value to start of the reserved region (the "X" value as
+	  specified in the "crashkernel=YM@XM" command line boot parameter
+	  passed to the panic-ed kernel).
+
 config SECCOMP
 	bool "Enable seccomp to safely compute untrusted bytecode"
 	depends on PROC_FS
diff --git a/arch/mips/include/asm/kexec.h b/arch/mips/include/asm/kexec.h
index 4314892..ee25ebb 100644
--- a/arch/mips/include/asm/kexec.h
+++ b/arch/mips/include/asm/kexec.h
@@ -9,22 +9,43 @@
 #ifndef _MIPS_KEXEC
 # define _MIPS_KEXEC
 
+#include <asm/stacktrace.h>
+
 /* Maximum physical address we can use pages from */
 #define KEXEC_SOURCE_MEMORY_LIMIT (0x20000000)
 /* Maximum address we can reach in physical address mode */
 #define KEXEC_DESTINATION_MEMORY_LIMIT (0x20000000)
  /* Maximum address we can use for the control code buffer */
 #define KEXEC_CONTROL_MEMORY_LIMIT (0x20000000)
-
-#define KEXEC_CONTROL_PAGE_SIZE 4096
+/* Reserve 3*4096 bytes for board-specific info */
+#define KEXEC_CONTROL_PAGE_SIZE (4096 + 3*4096)
 
 /* The native architecture */
 #define KEXEC_ARCH KEXEC_ARCH_MIPS
+#define MAX_NOTE_BYTES 1024
 
 static inline void crash_setup_regs(struct pt_regs *newregs,
 				    struct pt_regs *oldregs)
 {
-	/* Dummy implementation for now */
+	if (oldregs)
+		memcpy(newregs, oldregs, sizeof(*newregs));
+	else
+		prepare_frametrace(newregs);
 }
 
+#ifdef CONFIG_KEXEC
+struct kimage;
+extern unsigned long kexec_args[4];
+extern int (*_machine_kexec_prepare)(struct kimage *);
+extern void (*_machine_kexec_shutdown)(void);
+extern void (*_machine_crash_shutdown)(struct pt_regs *regs);
+extern void default_machine_crash_shutdown(struct pt_regs *regs);
+#ifdef CONFIG_SMP
+extern const unsigned char kexec_smp_wait[];
+extern unsigned long secondary_kexec_args[4];
+extern void (*relocated_kexec_smp_wait) (void *);
+extern atomic_t kexec_ready_to_reboot;
+#endif
+#endif
+
 #endif /* !_MIPS_KEXEC */
diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h
index d4fb4d8..f33b5fd 100644
--- a/arch/mips/include/asm/smp.h
+++ b/arch/mips/include/asm/smp.h
@@ -40,6 +40,8 @@
 #define SMP_CALL_FUNCTION	0x2
 /* Octeon - Tell another core to flush its icache */
 #define SMP_ICACHE_FLUSH	0x4
+/* Used by kexec crashdump to save all cpu's state */
+#define SMP_DUMP		0x8
 
 extern volatile cpumask_t cpu_callin_map;
 
@@ -91,4 +93,8 @@
 	mp_ops->send_ipi_mask(mask, SMP_CALL_FUNCTION);
 }
 
+#if defined(CONFIG_KEXEC)
+extern void (*dump_ipi_function_ptr)(void *);
+void dump_send_ipi(void (*dump_ipi_callback)(void *));
+#endif
 #endif /* __ASM_SMP_H */
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 8b28bc4..764597b 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -80,7 +80,8 @@
 
 obj-$(CONFIG_GPIO_TXX9)		+= gpio_txx9.o
 
-obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
+obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o crash.o
+obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 obj-$(CONFIG_SPINLOCK_TEST)	+= spinlock_test.o
 obj-$(CONFIG_MIPS_MACHINE)	+= mips_machine.o
diff --git a/arch/mips/kernel/crash.c b/arch/mips/kernel/crash.c
new file mode 100644
index 0000000..0f53c39
--- /dev/null
+++ b/arch/mips/kernel/crash.c
@@ -0,0 +1,71 @@
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/reboot.h>
+#include <linux/kexec.h>
+#include <linux/bootmem.h>
+#include <linux/crash_dump.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+
+/* This keeps a track of which one is crashing cpu. */
+static int crashing_cpu = -1;
+static cpumask_t cpus_in_crash = CPU_MASK_NONE;
+
+#ifdef CONFIG_SMP
+static void crash_shutdown_secondary(void *ignore)
+{
+	struct pt_regs *regs;
+	int cpu = smp_processor_id();
+
+	regs = task_pt_regs(current);
+
+	if (!cpu_online(cpu))
+		return;
+
+	local_irq_disable();
+	if (!cpu_isset(cpu, cpus_in_crash))
+		crash_save_cpu(regs, cpu);
+	cpu_set(cpu, cpus_in_crash);
+
+	while (!atomic_read(&kexec_ready_to_reboot))
+		cpu_relax();
+	relocated_kexec_smp_wait(NULL);
+	/* NOTREACHED */
+}
+
+static void crash_kexec_prepare_cpus(void)
+{
+	unsigned int msecs;
+
+	unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
+
+	dump_send_ipi(crash_shutdown_secondary);
+	smp_wmb();
+
+	/*
+	 * The crash CPU sends an IPI and wait for other CPUs to
+	 * respond. Delay of at least 10 seconds.
+	 */
+	pr_emerg("Sending IPI to other cpus...\n");
+	msecs = 10000;
+	while ((cpus_weight(cpus_in_crash) < ncpus) && (--msecs > 0)) {
+		cpu_relax();
+		mdelay(1);
+	}
+}
+
+#else /* !defined(CONFIG_SMP)  */
+static void crash_kexec_prepare_cpus(void) {}
+#endif /* !defined(CONFIG_SMP)  */
+
+void default_machine_crash_shutdown(struct pt_regs *regs)
+{
+	local_irq_disable();
+	crashing_cpu = smp_processor_id();
+	crash_save_cpu(regs, crashing_cpu);
+	crash_kexec_prepare_cpus();
+	cpu_set(crashing_cpu, cpus_in_crash);
+}
diff --git a/arch/mips/kernel/crash_dump.c b/arch/mips/kernel/crash_dump.c
new file mode 100644
index 0000000..d9ec389
--- /dev/null
+++ b/arch/mips/kernel/crash_dump.c
@@ -0,0 +1,77 @@
+#include <linux/highmem.h>
+#include <linux/bootmem.h>
+#include <linux/crash_dump.h>
+#include <asm/uaccess.h>
+
+unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
+
+static int __init parse_savemaxmem(char *p)
+{
+	if (p)
+		saved_max_pfn = (memparse(p, &p) >> PAGE_SHIFT) - 1;
+
+	return 1;
+}
+__setup("savemaxmem=", parse_savemaxmem);
+
+
+static void *kdump_buf_page;
+
+/**
+ * copy_oldmem_page - copy one page from "oldmem"
+ * @pfn: page frame number to be copied
+ * @buf: target memory address for the copy; this can be in kernel address
+ *	space or user address space (see @userbuf)
+ * @csize: number of bytes to copy
+ * @offset: offset in bytes into the page (based on pfn) to begin the copy
+ * @userbuf: if set, @buf is in user address space, use copy_to_user(),
+ *	otherwise @buf is in kernel address space, use memcpy().
+ *
+ * Copy a page from "oldmem". For this page, there is no pte mapped
+ * in the current kernel.
+ *
+ * Calling copy_to_user() in atomic context is not desirable. Hence first
+ * copying the data to a pre-allocated kernel page and then copying to user
+ * space in non-atomic context.
+ */
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
+			 size_t csize, unsigned long offset, int userbuf)
+{
+	void  *vaddr;
+
+	if (!csize)
+		return 0;
+
+	vaddr = kmap_atomic_pfn(pfn);
+
+	if (!userbuf) {
+		memcpy(buf, (vaddr + offset), csize);
+		kunmap_atomic(vaddr);
+	} else {
+		if (!kdump_buf_page) {
+			pr_warning("Kdump: Kdump buffer page not allocated\n");
+
+			return -EFAULT;
+		}
+		copy_page(kdump_buf_page, vaddr);
+		kunmap_atomic(vaddr);
+		if (copy_to_user(buf, (kdump_buf_page + offset), csize))
+			return -EFAULT;
+	}
+
+	return csize;
+}
+
+static int __init kdump_buf_page_init(void)
+{
+	int ret = 0;
+
+	kdump_buf_page = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!kdump_buf_page) {
+		pr_warning("Kdump: Failed to allocate kdump buffer page\n");
+		ret = -ENOMEM;
+	}
+
+	return ret;
+}
+arch_initcall(kdump_buf_page_init);
diff --git a/arch/mips/kernel/machine_kexec.c b/arch/mips/kernel/machine_kexec.c
index 85beb9b..992e184 100644
--- a/arch/mips/kernel/machine_kexec.c
+++ b/arch/mips/kernel/machine_kexec.c
@@ -5,7 +5,7 @@
  * This source code is licensed under the GNU General Public License,
  * Version 2.  See the file COPYING for more details.
  */
-
+#include <linux/compiler.h>
 #include <linux/kexec.h>
 #include <linux/mm.h>
 #include <linux/delay.h>
@@ -19,9 +19,19 @@
 extern unsigned long kexec_start_address;
 extern unsigned long kexec_indirection_page;
 
+int (*_machine_kexec_prepare)(struct kimage *) = NULL;
+void (*_machine_kexec_shutdown)(void) = NULL;
+void (*_machine_crash_shutdown)(struct pt_regs *regs) = NULL;
+#ifdef CONFIG_SMP
+void (*relocated_kexec_smp_wait) (void *);
+atomic_t kexec_ready_to_reboot = ATOMIC_INIT(0);
+#endif
+
 int
 machine_kexec_prepare(struct kimage *kimage)
 {
+	if (_machine_kexec_prepare)
+		return _machine_kexec_prepare(kimage);
 	return 0;
 }
 
@@ -33,14 +43,20 @@
 void
 machine_shutdown(void)
 {
+	if (_machine_kexec_shutdown)
+		_machine_kexec_shutdown();
 }
 
 void
 machine_crash_shutdown(struct pt_regs *regs)
 {
+	if (_machine_crash_shutdown)
+		_machine_crash_shutdown(regs);
+	else
+		default_machine_crash_shutdown(regs);
 }
 
-typedef void (*noretfun_t)(void) __attribute__((noreturn));
+typedef void (*noretfun_t)(void) __noreturn;
 
 void
 machine_kexec(struct kimage *image)
@@ -52,7 +68,9 @@
 	reboot_code_buffer =
 	  (unsigned long)page_address(image->control_code_page);
 
-	kexec_start_address = image->start;
+	kexec_start_address =
+		(unsigned long) phys_to_virt(image->start);
+
 	kexec_indirection_page =
 		(unsigned long) phys_to_virt(image->head & PAGE_MASK);
 
@@ -63,7 +81,7 @@
 	 * The generic kexec code builds a page list with physical
 	 * addresses. they are directly accessible through KSEG0 (or
 	 * CKSEG0 or XPHYS if on 64bit system), hence the
-	 * pys_to_virt() call.
+	 * phys_to_virt() call.
 	 */
 	for (ptr = &image->head; (entry = *ptr) && !(entry &IND_DONE);
 	     ptr = (entry & IND_INDIRECTION) ?
@@ -81,5 +99,12 @@
 	printk("Will call new kernel at %08lx\n", image->start);
 	printk("Bye ...\n");
 	__flush_cache_all();
+#ifdef CONFIG_SMP
+	/* All secondary cpus now may jump to kexec_wait cycle */
+	relocated_kexec_smp_wait = reboot_code_buffer +
+		(void *)(kexec_smp_wait - relocate_new_kernel);
+	smp_wmb();
+	atomic_set(&kexec_ready_to_reboot, 1);
+#endif
 	((noretfun_t) reboot_code_buffer)();
 }
diff --git a/arch/mips/kernel/relocate_kernel.S b/arch/mips/kernel/relocate_kernel.S
index 87481f9..0b10858 100644
--- a/arch/mips/kernel/relocate_kernel.S
+++ b/arch/mips/kernel/relocate_kernel.S
@@ -15,6 +15,11 @@
 #include <asm/addrspace.h>
 
 LEAF(relocate_new_kernel)
+	PTR_L a0,	arg0
+	PTR_L a1,	arg1
+	PTR_L a2,	arg2
+	PTR_L a3,	arg3
+
 	PTR_L		s0, kexec_indirection_page
 	PTR_L		s1, kexec_start_address
 
@@ -26,7 +31,6 @@
 	and		s3, s2, 0x1
 	beq		s3, zero, 1f
 	and		s4, s2, ~0x1	/* store destination addr in s4 */
-	move		a0, s4
 	b		process_entry
 
 1:
@@ -60,10 +64,92 @@
 	b		process_entry
 
 done:
+#ifdef CONFIG_SMP
+	/* kexec_flag reset is signal to other CPUs what kernel
+	   was moved to it's location. Note - we need relocated address
+	   of kexec_flag.  */
+
+	bal		1f
+ 1: 	move		t1,ra;
+	PTR_LA		t2,1b
+	PTR_LA		t0,kexec_flag
+	PTR_SUB		t0,t0,t2;
+	PTR_ADD		t0,t1,t0;
+	LONG_S		zero,(t0)
+#endif
+
+	sync
 	/* jump to kexec_start_address */
 	j		s1
 	END(relocate_new_kernel)
 
+#ifdef CONFIG_SMP
+/*
+ * Other CPUs should wait until code is relocated and
+ * then start at entry (?) point.
+ */
+LEAF(kexec_smp_wait)
+	PTR_L		a0, s_arg0
+	PTR_L		a1, s_arg1
+	PTR_L		a2, s_arg2
+	PTR_L		a3, s_arg3
+	PTR_L		s1, kexec_start_address
+
+	/* Non-relocated address works for args and kexec_start_address ( old
+	 * kernel is not overwritten). But we need relocated address of
+	 * kexec_flag.
+	 */
+
+	bal		1f
+1:	move		t1,ra;
+	PTR_LA		t2,1b
+	PTR_LA		t0,kexec_flag
+	PTR_SUB		t0,t0,t2;
+	PTR_ADD		t0,t1,t0;
+
+1:	LONG_L		s0, (t0)
+	bne		s0, zero,1b
+
+	sync
+	j		s1
+	END(kexec_smp_wait)
+#endif
+
+#ifdef __mips64
+       /* all PTR's must be aligned to 8 byte in 64-bit mode */
+       .align  3
+#endif
+
+/* All parameters to new kernel are passed in registers a0-a3.
+ * kexec_args[0..3] are uses to prepare register values.
+ */
+
+kexec_args:
+	EXPORT(kexec_args)
+arg0:	PTR		0x0
+arg1:	PTR		0x0
+arg2:	PTR		0x0
+arg3:	PTR		0x0
+	.size	kexec_args,PTRSIZE*4
+
+#ifdef CONFIG_SMP
+/*
+ * Secondary CPUs may have different kernel parameters in
+ * their registers a0-a3. secondary_kexec_args[0..3] are used
+ * to prepare register values.
+ */
+secondary_kexec_args:
+	EXPORT(secondary_kexec_args)
+s_arg0:	PTR		0x0
+s_arg1:	PTR		0x0
+s_arg2:	PTR		0x0
+s_arg3:	PTR		0x0
+	.size	secondary_kexec_args,PTRSIZE*4
+kexec_flag:
+	LONG		0x1
+
+#endif
+
 kexec_start_address:
 	EXPORT(kexec_start_address)
 	PTR		0x0
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 290dc6a..8c41187 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -22,6 +22,7 @@
 #include <linux/console.h>
 #include <linux/pfn.h>
 #include <linux/debugfs.h>
+#include <linux/kexec.h>
 
 #include <asm/addrspace.h>
 #include <asm/bootinfo.h>
@@ -536,12 +537,64 @@
 	}
 
 	bootmem_init();
+#ifdef CONFIG_KEXEC
+	if (crashk_res.start != crashk_res.end)
+		reserve_bootmem(crashk_res.start,
+				crashk_res.end - crashk_res.start + 1,
+				BOOTMEM_DEFAULT);
+#endif
 	device_tree_init();
 	sparse_init();
 	plat_swiotlb_setup();
 	paging_init();
 }
 
+#ifdef CONFIG_KEXEC
+static inline unsigned long long get_total_mem(void)
+{
+	unsigned long long total;
+
+	total = max_pfn - min_low_pfn;
+	return total << PAGE_SHIFT;
+}
+
+static void __init mips_parse_crashkernel(void)
+{
+	unsigned long long total_mem;
+	unsigned long long crash_size, crash_base;
+	int ret;
+
+	total_mem = get_total_mem();
+	ret = parse_crashkernel(boot_command_line, total_mem,
+				&crash_size, &crash_base);
+	if (ret != 0 || crash_size <= 0)
+		return;
+
+	crashk_res.start = crash_base;
+	crashk_res.end   = crash_base + crash_size - 1;
+}
+
+static void __init request_crashkernel(struct resource *res)
+{
+	int ret;
+
+	ret = request_resource(res, &crashk_res);
+	if (!ret)
+		pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
+			(unsigned long)((crashk_res.end -
+				crashk_res.start + 1) >> 20),
+			(unsigned long)(crashk_res.start  >> 20));
+}
+#else /* !defined(CONFIG_KEXEC)  */
+static void __init mips_parse_crashkernel(void)
+{
+}
+
+static void __init request_crashkernel(struct resource *res)
+{
+}
+#endif /* !defined(CONFIG_KEXEC)  */
+
 static void __init resource_init(void)
 {
 	int i;
@@ -557,6 +610,8 @@
 	/*
 	 * Request address space for all standard RAM.
 	 */
+	mips_parse_crashkernel();
+
 	for (i = 0; i < boot_mem_map.nr_map; i++) {
 		struct resource *res;
 		unsigned long start, end;
@@ -593,6 +648,7 @@
 		 */
 		request_resource(res, &code_resource);
 		request_resource(res, &data_resource);
+		request_crashkernel(res);
 	}
 }
 
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 9005bf9..2e6374a 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -386,3 +386,20 @@
 
 EXPORT_SYMBOL(flush_tlb_page);
 EXPORT_SYMBOL(flush_tlb_one);
+
+#if defined(CONFIG_KEXEC)
+void (*dump_ipi_function_ptr)(void *) = NULL;
+void dump_send_ipi(void (*dump_ipi_callback)(void *))
+{
+	int i;
+	int cpu = smp_processor_id();
+
+	dump_ipi_function_ptr = dump_ipi_callback;
+	smp_mb();
+	for_each_online_cpu(i)
+		if (i != cpu)
+			mp_ops->send_ipi_single(i, SMP_DUMP);
+
+}
+EXPORT_SYMBOL(dump_send_ipi);
+#endif
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 9be3df1..da0c294 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -13,6 +13,7 @@
  */
 #include <linux/bug.h>
 #include <linux/compiler.h>
+#include <linux/kexec.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -409,6 +410,9 @@
 		panic("Fatal exception");
 	}
 
+	if (regs && kexec_should_crash(current))
+		crash_kexec(regs);
+
 	do_exit(sig);
 }